b15dac53f1
OrgSocialParser.parsePosts() collected lines after `* Posts` until the next top-level heading, then stopped. That assumption broke the moment a user wrote an Org heading inside a post body — one such post on 2025-10-14 hid every post that followed it (six months of content) until the same user tried to see their own freshly-published post and noticed it was missing. Fix: read from `* Posts` to end of file. Org Social spec defines no further top-level sections, and `* foo` inside a body is body content. Regression test covers this case. Also make the own profile always take precedence when the TimelineFetcher merges feeds — the caller's bypassCache copy beats any relay-wide download that may be stale.
317 lines
13 KiB
Swift
317 lines
13 KiB
Swift
import Foundation
|
|
|
|
/// Parses the raw text of a `social.org` file into an `OrgSocialProfile`.
|
|
///
|
|
/// The parser is lenient: missing required fields (TITLE, NICK) produce `nil` values
|
|
/// rather than errors. Invalid property values (wrong format) are silently discarded,
|
|
/// matching the behaviour of the reference Elisp client.
|
|
public struct OrgSocialParser: Sendable {
|
|
|
|
public init() {}
|
|
|
|
// MARK: - Public API
|
|
|
|
/// Parses the raw UTF-8 content of a `social.org` file.
|
|
///
|
|
/// - Parameter content: Full text of the file.
|
|
/// - Returns: A populated `OrgSocialProfile`. `feedURL` is always `nil` here;
|
|
/// set it on the result if you know the source URL.
|
|
public func parse(_ content: String) -> OrgSocialProfile {
|
|
let lines = content.components(separatedBy: "\n")
|
|
let headers = parseHeaders(lines: lines)
|
|
let posts = parsePosts(lines: lines)
|
|
|
|
return OrgSocialProfile(
|
|
title: headers["TITLE"]?.first,
|
|
nick: headers["NICK"]?.first,
|
|
description: headers["DESCRIPTION"]?.first,
|
|
avatar: headers["AVATAR"]?.first.flatMap { URL(string: $0) },
|
|
links: headers["LINK"]?.compactMap { URL(string: $0) } ?? [],
|
|
location: headers["LOCATION"]?.first,
|
|
birthday: headers["BIRTHDAY"]?.first,
|
|
languages: headers["LANGUAGE"]?.first
|
|
.map { $0.split(separator: " ").map(String.init).filter { !$0.isEmpty } } ?? [],
|
|
feedURL: nil,
|
|
pinned: headers["PINNED"]?.first,
|
|
follows: headers["FOLLOW"]?.compactMap { parseFollow($0) } ?? [],
|
|
groups: headers["GROUP"]?.compactMap { parseGroup($0) } ?? [],
|
|
contacts: headers["CONTACT"] ?? [],
|
|
posts: posts
|
|
)
|
|
}
|
|
|
|
// MARK: - Header parsing
|
|
|
|
/// Extracts all `#+KEYWORD: value` entries from the header section.
|
|
/// Stops at `* Posts` or any other top-level heading.
|
|
/// Ignores lines inside `#+BEGIN_SRC`, `#+BEGIN_EXAMPLE`, and `#+BEGIN_QUOTE` blocks.
|
|
private func parseHeaders(lines: [String]) -> [String: [String]] {
|
|
var result: [String: [String]] = [:]
|
|
var inBlock = false
|
|
|
|
for line in lines {
|
|
let t = line.trimmingCharacters(in: .whitespaces)
|
|
let u = t.uppercased()
|
|
|
|
if u.hasPrefix("#+BEGIN_SRC") || u.hasPrefix("#+BEGIN_EXAMPLE") || u.hasPrefix("#+BEGIN_QUOTE") {
|
|
inBlock = true; continue
|
|
}
|
|
if u.hasPrefix("#+END_SRC") || u.hasPrefix("#+END_EXAMPLE") || u.hasPrefix("#+END_QUOTE") {
|
|
inBlock = false; continue
|
|
}
|
|
if inBlock { continue }
|
|
|
|
// Stop when we hit any top-level heading
|
|
if isTopLevelHeading(t) { break }
|
|
|
|
guard t.hasPrefix("#+"), let colonIdx = t.firstIndex(of: ":") else { continue }
|
|
|
|
let keyword = String(t[t.index(t.startIndex, offsetBy: 2)..<colonIdx]).uppercased()
|
|
guard !keyword.isEmpty, keyword.allSatisfy({ $0.isLetter || $0 == "_" }) else { continue }
|
|
|
|
let value = String(t[t.index(after: colonIdx)...]).trimmingCharacters(in: .whitespaces)
|
|
if !value.isEmpty {
|
|
result[keyword, default: []].append(value)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// MARK: - Follow / Group
|
|
|
|
private func parseFollow(_ line: String) -> OrgSocialFollow? {
|
|
let parts = line.split(separator: " ", omittingEmptySubsequences: true).map(String.init)
|
|
guard !parts.isEmpty else { return nil }
|
|
|
|
if isHTTP(parts[0]) {
|
|
// Format: "https://feed.url"
|
|
return URL(string: parts[0]).map { OrgSocialFollow(name: nil, url: $0) }
|
|
}
|
|
if parts.count >= 2, isHTTP(parts[1]), let url = URL(string: parts[1]) {
|
|
// Format: "nick https://feed.url"
|
|
return OrgSocialFollow(name: parts[0], url: url)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
private func parseGroup(_ line: String) -> OrgSocialGroup? {
|
|
let t = line.trimmingCharacters(in: .whitespaces)
|
|
guard let lastSpace = t.lastIndex(of: " ") else { return nil }
|
|
let urlStr = String(t[t.index(after: lastSpace)...])
|
|
let name = String(t[..<lastSpace]).trimmingCharacters(in: .whitespaces)
|
|
guard !name.isEmpty, isHTTP(urlStr), let url = URL(string: urlStr) else { return nil }
|
|
return OrgSocialGroup(name: name, relayURL: url)
|
|
}
|
|
|
|
// MARK: - Post parsing
|
|
|
|
private func parsePosts(lines: [String]) -> [OrgSocialPost] {
|
|
// Find the "* Posts" section
|
|
guard let postsIdx = lines.firstIndex(where: { $0.trimmingCharacters(in: .whitespaces) == "* Posts" }) else {
|
|
return []
|
|
}
|
|
|
|
// Everything after `* Posts` is the posts section. Org Social spec
|
|
// defines no further top-level sections, and users are free to use
|
|
// `* heading` lines inside post bodies — so we must NOT treat them
|
|
// as section boundaries. Breaking on the first top-level heading
|
|
// silently dropped every post that followed a body containing one.
|
|
let sectionLines = Array(lines[(postsIdx + 1)..<lines.count])
|
|
|
|
// Group into post blocks at lines starting with "** "
|
|
var blocks: [[String]] = []
|
|
var current: [String] = []
|
|
for line in sectionLines {
|
|
if isPostHeading(line) {
|
|
if !current.isEmpty { blocks.append(current) }
|
|
current = [line]
|
|
} else {
|
|
current.append(line)
|
|
}
|
|
}
|
|
if !current.isEmpty { blocks.append(current) }
|
|
|
|
return blocks.compactMap { parsePostBlock($0) }
|
|
}
|
|
|
|
/// Parses a single post block (array of lines starting with the `** ` heading).
|
|
private func parsePostBlock(_ lines: [String]) -> OrgSocialPost? {
|
|
guard !lines.isEmpty else { return nil }
|
|
|
|
// --- ID from header line ---
|
|
let headerRest = lines[0].hasPrefix("** ")
|
|
? String(lines[0].dropFirst(3)).trimmingCharacters(in: .whitespaces)
|
|
: ""
|
|
let idFromHeader = extractLeadingTimestamp(from: headerRest)
|
|
|
|
// --- Properties block ---
|
|
var properties: [String: String] = [:]
|
|
var idFromProperties: String? = nil
|
|
var contentStart = 1 // default if no :PROPERTIES: block
|
|
var inProps = false
|
|
|
|
for (i, line) in lines[1...].enumerated() {
|
|
let t = line.trimmingCharacters(in: .whitespaces)
|
|
if t == ":PROPERTIES:" { inProps = true; continue }
|
|
if t == ":END:" {
|
|
// i is 0-based index within lines[1...], so actual index in lines is i+1.
|
|
// Content starts at i+2.
|
|
contentStart = i + 2
|
|
inProps = false
|
|
continue
|
|
}
|
|
if inProps, let (key, val) = parsePropLine(t) {
|
|
if key == "ID" {
|
|
idFromProperties = isValidTimestamp(val) ? val : nil
|
|
} else {
|
|
properties[key] = val
|
|
}
|
|
}
|
|
}
|
|
|
|
// Header ID takes priority over property ID (spec v1.6)
|
|
guard let timestamp = idFromHeader ?? idFromProperties else { return nil }
|
|
guard let date = parseDate(timestamp) else { return nil }
|
|
|
|
// Skip scheduled posts (future timestamps)
|
|
guard date <= Date() else { return nil }
|
|
|
|
// --- Text content ---
|
|
let rawLines = contentStart < lines.count ? Array(lines[contentStart...]) : []
|
|
let text = extractText(from: rawLines)
|
|
|
|
// --- Validated properties ---
|
|
return OrgSocialPost(
|
|
timestamp: timestamp,
|
|
date: date,
|
|
text: text,
|
|
lang: validateLang(properties["LANG"]),
|
|
tags: parseTags(properties["TAGS"]),
|
|
client: validateShortText(properties["CLIENT"]),
|
|
replyTo: validateURLTimestamp(properties["REPLY_TO"]),
|
|
include: validateURLTimestamp(properties["INCLUDE"]),
|
|
pollEnd: properties["POLL_END"].flatMap { parseDate($0) },
|
|
pollOption: validateShortText(properties["POLL_OPTION"]),
|
|
group: validateGroupProp(properties["GROUP"]),
|
|
mood: validateShortText(properties["MOOD"]),
|
|
migration: properties["MIGRATION"],
|
|
visibility: validateVisibility(properties["VISIBILITY"])
|
|
)
|
|
}
|
|
|
|
/// Parses a single `:KEY: value` line from inside a `:PROPERTIES:` block.
|
|
private func parsePropLine(_ trimmed: String) -> (key: String, value: String)? {
|
|
guard trimmed.hasPrefix(":"),
|
|
trimmed != ":PROPERTIES:",
|
|
trimmed != ":END:" else { return nil }
|
|
|
|
let withoutFirst = String(trimmed.dropFirst(1))
|
|
guard let secondColon = withoutFirst.firstIndex(of: ":") else { return nil }
|
|
|
|
let key = String(withoutFirst[..<secondColon]).uppercased()
|
|
let value = String(withoutFirst[withoutFirst.index(after: secondColon)...])
|
|
.trimmingCharacters(in: .whitespaces)
|
|
|
|
guard !key.isEmpty, !value.isEmpty else { return nil }
|
|
return (key, value)
|
|
}
|
|
|
|
/// Extracts text content, filtering Org Mode comment and property lines.
|
|
/// Mirrors the reference Elisp implementation filter rules.
|
|
private func extractText(from lines: [String]) -> String {
|
|
lines
|
|
.filter { line in
|
|
let t = line.trimmingCharacters(in: .whitespaces)
|
|
return !t.hasPrefix("#") && !t.hasPrefix(":")
|
|
}
|
|
.joined(separator: "\n")
|
|
.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
}
|
|
|
|
// MARK: - Property validation
|
|
|
|
private func validateLang(_ s: String?) -> String? {
|
|
guard let s else { return nil }
|
|
let ok = s.range(of: #"^[a-z]{2,5}(-[a-z]{2,3})?$"#, options: .regularExpression) != nil
|
|
return ok ? s : nil
|
|
}
|
|
|
|
private func parseTags(_ s: String?) -> [String] {
|
|
guard let s else { return [] }
|
|
let ok = s.range(of: #"^[a-zA-Z0-9_-]+(\s+[a-zA-Z0-9_-]+)*$"#, options: .regularExpression) != nil
|
|
guard ok else { return [] }
|
|
return s.split(separator: " ").map(String.init)
|
|
}
|
|
|
|
private func validateShortText(_ s: String?) -> String? {
|
|
guard let s, s.count < 200, !s.contains("\n"), !s.contains("\r") else { return nil }
|
|
return s
|
|
}
|
|
|
|
private func validateURLTimestamp(_ s: String?) -> String? {
|
|
guard let s else { return nil }
|
|
let ok = s.range(
|
|
of: #"^https?://.+#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}"#,
|
|
options: .regularExpression
|
|
) != nil
|
|
return ok ? s : nil
|
|
}
|
|
|
|
private func validateGroupProp(_ s: String?) -> String? {
|
|
guard let s else { return nil }
|
|
let parts = s.split(separator: " ", omittingEmptySubsequences: true)
|
|
guard parts.count >= 2, isHTTP(String(parts.last!)) else { return nil }
|
|
return s
|
|
}
|
|
|
|
private func validateVisibility(_ s: String?) -> String? {
|
|
guard let s else { return nil }
|
|
return (s == "public" || s == "mention") ? s : nil
|
|
}
|
|
|
|
// MARK: - Date / timestamp utilities
|
|
|
|
private func parseDate(_ timestamp: String) -> Date? {
|
|
// Delegates to the shared PostWriter helper so emission and parsing
|
|
// stay in sync (handles compact `+0200`, colon `+02:00`, and `Z`).
|
|
PostWriter.parseTimestamp(timestamp)
|
|
}
|
|
|
|
/// Returns `true` if the string is a valid RFC 3339 post timestamp.
|
|
private func isValidTimestamp(_ s: String) -> Bool {
|
|
s.range(
|
|
of: #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:?\d{2}|Z)$"#,
|
|
options: .regularExpression
|
|
) != nil
|
|
}
|
|
|
|
/// Extracts a leading RFC 3339 timestamp from the start of a string.
|
|
/// Used when the ID is embedded in the `** ` heading.
|
|
private func extractLeadingTimestamp(from s: String) -> String? {
|
|
guard let range = s.range(
|
|
of: #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:?\d{2}|Z)"#,
|
|
options: .regularExpression
|
|
) else { return nil }
|
|
return String(s[range])
|
|
}
|
|
|
|
// MARK: - Line classification helpers
|
|
|
|
/// Returns `true` for a top-level heading (`* ` but NOT `** `).
|
|
private func isTopLevelHeading(_ line: String) -> Bool {
|
|
guard line.hasPrefix("* ") else { return false }
|
|
return !line.hasPrefix("** ")
|
|
}
|
|
|
|
/// Returns `true` for a level-2 heading (`** ...` but NOT `*** ...`).
|
|
private func isPostHeading(_ line: String) -> Bool {
|
|
guard line.hasPrefix("**") else { return false }
|
|
if line.count == 2 { return true }
|
|
return line[line.index(line.startIndex, offsetBy: 2)] != "*"
|
|
}
|
|
|
|
private func isHTTP(_ s: String) -> Bool {
|
|
s.hasPrefix("http://") || s.hasPrefix("https://")
|
|
}
|
|
}
|