import Foundation /// Parses the raw text of a `social.org` file into an `OrgSocialProfile`. /// /// The parser is lenient: missing required fields (TITLE, NICK) produce `nil` values /// rather than errors. Invalid property values (wrong format) are silently discarded, /// matching the behaviour of the reference Elisp client. public struct OrgSocialParser: Sendable { public init() {} // MARK: - Public API /// Parses the raw UTF-8 content of a `social.org` file. /// /// - Parameter content: Full text of the file. /// - Returns: A populated `OrgSocialProfile`. `feedURL` is always `nil` here; /// set it on the result if you know the source URL. public func parse(_ content: String) -> OrgSocialProfile { let lines = content.components(separatedBy: "\n") let headers = parseHeaders(lines: lines) let posts = parsePosts(lines: lines) return OrgSocialProfile( title: headers["TITLE"]?.first, nick: headers["NICK"]?.first, description: headers["DESCRIPTION"]?.first, avatar: headers["AVATAR"]?.first.flatMap { URL(string: $0) }, links: headers["LINK"]?.compactMap { URL(string: $0) } ?? [], location: headers["LOCATION"]?.first, birthday: headers["BIRTHDAY"]?.first, languages: headers["LANGUAGE"]?.first .map { $0.split(separator: " ").map(String.init).filter { !$0.isEmpty } } ?? [], feedURL: nil, pinned: headers["PINNED"]?.first, follows: headers["FOLLOW"]?.compactMap { parseFollow($0) } ?? [], groups: headers["GROUP"]?.compactMap { parseGroup($0) } ?? [], contacts: headers["CONTACT"] ?? [], posts: posts ) } // MARK: - Header parsing /// Extracts all `#+KEYWORD: value` entries from the header section. /// Stops at `* Posts` or any other top-level heading. /// Ignores lines inside `#+BEGIN_SRC`, `#+BEGIN_EXAMPLE`, and `#+BEGIN_QUOTE` blocks. private func parseHeaders(lines: [String]) -> [String: [String]] { var result: [String: [String]] = [:] var inBlock = false for line in lines { let t = line.trimmingCharacters(in: .whitespaces) let u = t.uppercased() if u.hasPrefix("#+BEGIN_SRC") || u.hasPrefix("#+BEGIN_EXAMPLE") || u.hasPrefix("#+BEGIN_QUOTE") { inBlock = true; continue } if u.hasPrefix("#+END_SRC") || u.hasPrefix("#+END_EXAMPLE") || u.hasPrefix("#+END_QUOTE") { inBlock = false; continue } if inBlock { continue } // Stop when we hit any top-level heading if isTopLevelHeading(t) { break } guard t.hasPrefix("#+"), let colonIdx = t.firstIndex(of: ":") else { continue } let keyword = String(t[t.index(t.startIndex, offsetBy: 2).. OrgSocialFollow? { let parts = line.split(separator: " ", omittingEmptySubsequences: true).map(String.init) guard !parts.isEmpty else { return nil } if isHTTP(parts[0]) { // Format: "https://feed.url" return URL(string: parts[0]).map { OrgSocialFollow(name: nil, url: $0) } } if parts.count >= 2, isHTTP(parts[1]), let url = URL(string: parts[1]) { // Format: "nick https://feed.url" return OrgSocialFollow(name: parts[0], url: url) } return nil } private func parseGroup(_ line: String) -> OrgSocialGroup? { let t = line.trimmingCharacters(in: .whitespaces) guard let lastSpace = t.lastIndex(of: " ") else { return nil } let urlStr = String(t[t.index(after: lastSpace)...]) let name = String(t[.. [OrgSocialPost] { // Find the "* Posts" section guard let postsIdx = lines.firstIndex(where: { $0.trimmingCharacters(in: .whitespaces) == "* Posts" }) else { return [] } // Everything after `* Posts` is the posts section. Org Social spec // defines no further top-level sections, and users are free to use // `* heading` lines inside post bodies — so we must NOT treat them // as section boundaries. Breaking on the first top-level heading // silently dropped every post that followed a body containing one. let sectionLines = Array(lines[(postsIdx + 1).. OrgSocialPost? { guard !lines.isEmpty else { return nil } // --- ID from header line --- let headerRest = lines[0].hasPrefix("** ") ? String(lines[0].dropFirst(3)).trimmingCharacters(in: .whitespaces) : "" let idFromHeader = extractLeadingTimestamp(from: headerRest) // --- Properties block --- var properties: [String: String] = [:] var idFromProperties: String? = nil var contentStart = 1 // default if no :PROPERTIES: block var inProps = false for (i, line) in lines[1...].enumerated() { let t = line.trimmingCharacters(in: .whitespaces) if t == ":PROPERTIES:" { inProps = true; continue } if t == ":END:" { // i is 0-based index within lines[1...], so actual index in lines is i+1. // Content starts at i+2. contentStart = i + 2 inProps = false continue } if inProps, let (key, val) = parsePropLine(t) { if key == "ID" { idFromProperties = isValidTimestamp(val) ? val : nil } else { properties[key] = val } } } // Header ID takes priority over property ID (spec v1.6) guard let timestamp = idFromHeader ?? idFromProperties else { return nil } guard let date = parseDate(timestamp) else { return nil } // Skip scheduled posts (future timestamps) guard date <= Date() else { return nil } // --- Text content --- let rawLines = contentStart < lines.count ? Array(lines[contentStart...]) : [] let text = extractText(from: rawLines) // --- Validated properties --- return OrgSocialPost( timestamp: timestamp, date: date, text: text, lang: validateLang(properties["LANG"]), tags: parseTags(properties["TAGS"]), client: validateShortText(properties["CLIENT"]), replyTo: validateURLTimestamp(properties["REPLY_TO"]), include: validateURLTimestamp(properties["INCLUDE"]), pollEnd: properties["POLL_END"].flatMap { parseDate($0) }, pollOption: validateShortText(properties["POLL_OPTION"]), group: validateGroupProp(properties["GROUP"]), mood: validateShortText(properties["MOOD"]), migration: properties["MIGRATION"], visibility: validateVisibility(properties["VISIBILITY"]) ) } /// Parses a single `:KEY: value` line from inside a `:PROPERTIES:` block. private func parsePropLine(_ trimmed: String) -> (key: String, value: String)? { guard trimmed.hasPrefix(":"), trimmed != ":PROPERTIES:", trimmed != ":END:" else { return nil } let withoutFirst = String(trimmed.dropFirst(1)) guard let secondColon = withoutFirst.firstIndex(of: ":") else { return nil } let key = String(withoutFirst[.. String { lines .filter { line in let t = line.trimmingCharacters(in: .whitespaces) return !t.hasPrefix("#") && !t.hasPrefix(":") } .joined(separator: "\n") .trimmingCharacters(in: .whitespacesAndNewlines) } // MARK: - Property validation private func validateLang(_ s: String?) -> String? { guard let s else { return nil } let ok = s.range(of: #"^[a-z]{2,5}(-[a-z]{2,3})?$"#, options: .regularExpression) != nil return ok ? s : nil } private func parseTags(_ s: String?) -> [String] { guard let s else { return [] } let ok = s.range(of: #"^[a-zA-Z0-9_-]+(\s+[a-zA-Z0-9_-]+)*$"#, options: .regularExpression) != nil guard ok else { return [] } return s.split(separator: " ").map(String.init) } private func validateShortText(_ s: String?) -> String? { guard let s, s.count < 200, !s.contains("\n"), !s.contains("\r") else { return nil } return s } private func validateURLTimestamp(_ s: String?) -> String? { guard let s else { return nil } let ok = s.range( of: #"^https?://.+#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}"#, options: .regularExpression ) != nil return ok ? s : nil } private func validateGroupProp(_ s: String?) -> String? { guard let s else { return nil } let parts = s.split(separator: " ", omittingEmptySubsequences: true) guard parts.count >= 2, isHTTP(String(parts.last!)) else { return nil } return s } private func validateVisibility(_ s: String?) -> String? { guard let s else { return nil } return (s == "public" || s == "mention") ? s : nil } // MARK: - Date / timestamp utilities private func parseDate(_ timestamp: String) -> Date? { // Delegates to the shared PostWriter helper so emission and parsing // stay in sync (handles compact `+0200`, colon `+02:00`, and `Z`). PostWriter.parseTimestamp(timestamp) } /// Returns `true` if the string is a valid RFC 3339 post timestamp. private func isValidTimestamp(_ s: String) -> Bool { s.range( of: #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:?\d{2}|Z)$"#, options: .regularExpression ) != nil } /// Extracts a leading RFC 3339 timestamp from the start of a string. /// Used when the ID is embedded in the `** ` heading. private func extractLeadingTimestamp(from s: String) -> String? { guard let range = s.range( of: #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:?\d{2}|Z)"#, options: .regularExpression ) else { return nil } return String(s[range]) } // MARK: - Line classification helpers /// Returns `true` for a top-level heading (`* ` but NOT `** `). private func isTopLevelHeading(_ line: String) -> Bool { guard line.hasPrefix("* ") else { return false } return !line.hasPrefix("** ") } /// Returns `true` for a level-2 heading (`** ...` but NOT `*** ...`). private func isPostHeading(_ line: String) -> Bool { guard line.hasPrefix("**") else { return false } if line.count == 2 { return true } return line[line.index(line.startIndex, offsetBy: 2)] != "*" } private func isHTTP(_ s: String) -> Bool { s.hasPrefix("http://") || s.hasPrefix("https://") } }