org-social-ios/Sources/OrgSocialKit/Parser/OrgSocialParser.swift

import Foundation

/// Parses the raw text of a `social.org` file into an `OrgSocialProfile`.
///
/// The parser is lenient: missing required fields (TITLE, NICK) produce `nil` values
/// rather than errors. Invalid property values (wrong format) are silently discarded,
/// matching the behaviour of the reference Elisp client.
public struct OrgSocialParser: Sendable {

    public init() {}

    // MARK: - Public API

    /// Parses the raw UTF-8 content of a `social.org` file.
    ///
    /// - Parameter content: Full text of the file.
    /// - Returns: A populated `OrgSocialProfile`. `feedURL` is always `nil` here;
    ///   set it on the result if you know the source URL.
    public func parse(_ content: String) -> OrgSocialProfile {
        let lines = content.components(separatedBy: "\n")
        let headers = parseHeaders(lines: lines)
        let posts = parsePosts(lines: lines)

        return OrgSocialProfile(
            title: headers["TITLE"]?.first,
            nick: headers["NICK"]?.first,
            description: headers["DESCRIPTION"]?.first,
            avatar: headers["AVATAR"]?.first.flatMap { URL(string: $0) },
            links: headers["LINK"]?.compactMap { URL(string: $0) } ?? [],
            location: headers["LOCATION"]?.first,
            birthday: headers["BIRTHDAY"]?.first,
            languages: headers["LANGUAGE"]?.first
                .map { $0.split(separator: " ").map(String.init).filter { !$0.isEmpty } } ?? [],
            feedURL: nil,
            pinned: headers["PINNED"]?.first,
            follows: headers["FOLLOW"]?.compactMap { parseFollow($0) } ?? [],
            groups: headers["GROUP"]?.compactMap { parseGroup($0) } ?? [],
            contacts: headers["CONTACT"] ?? [],
            posts: posts
        )
    }

    // MARK: - Header parsing

    /// Extracts all `#+KEYWORD: value` entries from the header section.
    /// Stops at `* Posts` or any other top-level heading.
    /// Ignores lines inside `#+BEGIN_SRC`, `#+BEGIN_EXAMPLE`, and `#+BEGIN_QUOTE` blocks.
    private func parseHeaders(lines: [String]) -> [String: [String]] {
        var result: [String: [String]] = [:]
        var inBlock = false

        for line in lines {
            let t = line.trimmingCharacters(in: .whitespaces)
            let u = t.uppercased()

            if u.hasPrefix("#+BEGIN_SRC") || u.hasPrefix("#+BEGIN_EXAMPLE") || u.hasPrefix("#+BEGIN_QUOTE") {
                inBlock = true; continue
            }
            if u.hasPrefix("#+END_SRC") || u.hasPrefix("#+END_EXAMPLE") || u.hasPrefix("#+END_QUOTE") {
                inBlock = false; continue
            }
            if inBlock { continue }

            // Stop when we hit any top-level heading
            if isTopLevelHeading(t) { break }

            guard t.hasPrefix("#+"), let colonIdx = t.firstIndex(of: ":") else { continue }

            let keyword = String(t[t.index(t.startIndex, offsetBy: 2)..<colonIdx]).uppercased()
            guard !keyword.isEmpty, keyword.allSatisfy({ $0.isLetter || $0 == "_" }) else { continue }

            let value = String(t[t.index(after: colonIdx)...]).trimmingCharacters(in: .whitespaces)
            if !value.isEmpty {
                result[keyword, default: []].append(value)
            }
        }
        return result
    }

    // MARK: - Follow / Group

    private func parseFollow(_ line: String) -> OrgSocialFollow? {
        let parts = line.split(separator: " ", omittingEmptySubsequences: true).map(String.init)
        guard !parts.isEmpty else { return nil }

        if isHTTP(parts[0]) {
            // Format: "https://feed.url"
            return URL(string: parts[0]).map { OrgSocialFollow(name: nil, url: $0) }
        }
        if parts.count >= 2, isHTTP(parts[1]), let url = URL(string: parts[1]) {
            // Format: "nick https://feed.url"
            return OrgSocialFollow(name: parts[0], url: url)
        }
        return nil
    }

    private func parseGroup(_ line: String) -> OrgSocialGroup? {
        let t = line.trimmingCharacters(in: .whitespaces)
        guard let lastSpace = t.lastIndex(of: " ") else { return nil }
        let urlStr = String(t[t.index(after: lastSpace)...])
        let name = String(t[..<lastSpace]).trimmingCharacters(in: .whitespaces)
        guard !name.isEmpty, isHTTP(urlStr), let url = URL(string: urlStr) else { return nil }
        return OrgSocialGroup(name: name, relayURL: url)
    }

    // MARK: - Post parsing

    private func parsePosts(lines: [String]) -> [OrgSocialPost] {
        // Find the "* Posts" section
        guard let postsIdx = lines.firstIndex(where: { $0.trimmingCharacters(in: .whitespaces) == "* Posts" }) else {
            return []
        }

        // Everything after `* Posts` is the posts section. Org Social spec
        // defines no further top-level sections, and users are free to use
        // `* heading` lines inside post bodies — so we must NOT treat them
        // as section boundaries. Breaking on the first top-level heading
        // silently dropped every post that followed a body containing one.
        let sectionLines = Array(lines[(postsIdx + 1)..<lines.count])

        // Group into post blocks at lines starting with "** "
        var blocks: [[String]] = []
        var current: [String] = []
        for line in sectionLines {
            if isPostHeading(line) {
                if !current.isEmpty { blocks.append(current) }
                current = [line]
            } else {
                current.append(line)
            }
        }
        if !current.isEmpty { blocks.append(current) }

        return blocks.compactMap { parsePostBlock($0) }
    }

    /// Parses a single post block (array of lines starting with the `** ` heading).
    private func parsePostBlock(_ lines: [String]) -> OrgSocialPost? {
        guard !lines.isEmpty else { return nil }

        // --- ID from header line ---
        let headerRest = lines[0].hasPrefix("** ")
            ? String(lines[0].dropFirst(3)).trimmingCharacters(in: .whitespaces)
            : ""
        let idFromHeader = extractLeadingTimestamp(from: headerRest)

        // --- Properties block ---
        var properties: [String: String] = [:]
        var idFromProperties: String? = nil
        var contentStart = 1  // default if no :PROPERTIES: block
        var inProps = false

        for (i, line) in lines[1...].enumerated() {
            let t = line.trimmingCharacters(in: .whitespaces)
            if t == ":PROPERTIES:" { inProps = true; continue }
            if t == ":END:" {
                // i is 0-based index within lines[1...], so actual index in lines is i+1.
                // Content starts at i+2.
                contentStart = i + 2
                inProps = false
                continue
            }
            if inProps, let (key, val) = parsePropLine(t) {
                if key == "ID" {
                    idFromProperties = isValidTimestamp(val) ? val : nil
                } else {
                    properties[key] = val
                }
            }
        }

        // Header ID takes priority over property ID (spec v1.6)
        guard let timestamp = idFromHeader ?? idFromProperties else { return nil }
        guard let date = parseDate(timestamp) else { return nil }

        // Skip scheduled posts (future timestamps)
        guard date <= Date() else { return nil }

        // --- Text content ---
        let rawLines = contentStart < lines.count ? Array(lines[contentStart...]) : []
        let text = extractText(from: rawLines)

        // --- Validated properties ---
        return OrgSocialPost(
            timestamp: timestamp,
            date: date,
            text: text,
            lang: validateLang(properties["LANG"]),
            tags: parseTags(properties["TAGS"]),
            client: validateShortText(properties["CLIENT"]),
            replyTo: validateURLTimestamp(properties["REPLY_TO"]),
            include: validateURLTimestamp(properties["INCLUDE"]),
            pollEnd: properties["POLL_END"].flatMap { parseDate($0) },
            pollOption: validateShortText(properties["POLL_OPTION"]),
            group: validateGroupProp(properties["GROUP"]),
            mood: validateShortText(properties["MOOD"]),
            migration: properties["MIGRATION"],
            visibility: validateVisibility(properties["VISIBILITY"])
        )
    }

    /// Parses a single `:KEY: value` line from inside a `:PROPERTIES:` block.
    private func parsePropLine(_ trimmed: String) -> (key: String, value: String)? {
        guard trimmed.hasPrefix(":"),
              trimmed != ":PROPERTIES:",
              trimmed != ":END:" else { return nil }

        let withoutFirst = String(trimmed.dropFirst(1))
        guard let secondColon = withoutFirst.firstIndex(of: ":") else { return nil }

        let key = String(withoutFirst[..<secondColon]).uppercased()
        let value = String(withoutFirst[withoutFirst.index(after: secondColon)...])
            .trimmingCharacters(in: .whitespaces)

        guard !key.isEmpty, !value.isEmpty else { return nil }
        return (key, value)
    }

    /// Extracts text content, filtering Org Mode comment and property lines.
    /// Mirrors the reference Elisp implementation filter rules.
    private func extractText(from lines: [String]) -> String {
        lines
            .filter { line in
                let t = line.trimmingCharacters(in: .whitespaces)
                return !t.hasPrefix("#") && !t.hasPrefix(":")
            }
            .joined(separator: "\n")
            .trimmingCharacters(in: .whitespacesAndNewlines)
    }

    // MARK: - Property validation

    private func validateLang(_ s: String?) -> String? {
        guard let s else { return nil }
        let ok = s.range(of: #"^[a-z]{2,5}(-[a-z]{2,3})?$"#, options: .regularExpression) != nil
        return ok ? s : nil
    }

    private func parseTags(_ s: String?) -> [String] {
        guard let s else { return [] }
        let ok = s.range(of: #"^[a-zA-Z0-9_-]+(\s+[a-zA-Z0-9_-]+)*$"#, options: .regularExpression) != nil
        guard ok else { return [] }
        return s.split(separator: " ").map(String.init)
    }

    private func validateShortText(_ s: String?) -> String? {
        guard let s, s.count < 200, !s.contains("\n"), !s.contains("\r") else { return nil }
        return s
    }

    private func validateURLTimestamp(_ s: String?) -> String? {
        guard let s else { return nil }
        let ok = s.range(
            of: #"^https?://.+#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}"#,
            options: .regularExpression
        ) != nil
        return ok ? s : nil
    }

    private func validateGroupProp(_ s: String?) -> String? {
        guard let s else { return nil }
        let parts = s.split(separator: " ", omittingEmptySubsequences: true)
        guard parts.count >= 2, isHTTP(String(parts.last!)) else { return nil }
        return s
    }

    private func validateVisibility(_ s: String?) -> String? {
        guard let s else { return nil }
        return (s == "public" || s == "mention") ? s : nil
    }

    // MARK: - Date / timestamp utilities

    private func parseDate(_ timestamp: String) -> Date? {
        // Delegates to the shared PostWriter helper so emission and parsing
        // stay in sync (handles compact `+0200`, colon `+02:00`, and `Z`).
        PostWriter.parseTimestamp(timestamp)
    }

    /// Returns `true` if the string is a valid RFC 3339 post timestamp.
    private func isValidTimestamp(_ s: String) -> Bool {
        s.range(
            of: #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:?\d{2}|Z)$"#,
            options: .regularExpression
        ) != nil
    }

    /// Extracts a leading RFC 3339 timestamp from the start of a string.
    /// Used when the ID is embedded in the `** ` heading.
    private func extractLeadingTimestamp(from s: String) -> String? {
        guard let range = s.range(
            of: #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:?\d{2}|Z)"#,
            options: .regularExpression
        ) else { return nil }
        return String(s[range])
    }

    // MARK: - Line classification helpers

    /// Returns `true` for a top-level heading (`* ` but NOT `** `).
    private func isTopLevelHeading(_ line: String) -> Bool {
        guard line.hasPrefix("* ") else { return false }
        return !line.hasPrefix("** ")
    }

    /// Returns `true` for a level-2 heading (`** ...` but NOT `*** ...`).
    private func isPostHeading(_ line: String) -> Bool {
        guard line.hasPrefix("**") else { return false }
        if line.count == 2 { return true }
        return line[line.index(line.startIndex, offsetBy: 2)] != "*"
    }

    private func isHTTP(_ s: String) -> Bool {
        s.hasPrefix("http://") || s.hasPrefix("https://")
    }
}