mirror of
https://github.com/tanrax/org-social.el
synced 2026-01-07 05:43:32 +01:00
302 lines
14 KiB
EmacsLisp
302 lines
14 KiB
EmacsLisp
;;; org-social-parser.el --- Parser functions for Org-social -*- lexical-binding: t -*- -*- coding: utf-8 -*-
|
|
|
|
;; SPDX-License-Identifier: GPL-3.0
|
|
|
|
;; Author: Andros Fenollosa <hi@andros.dev>
|
|
;; Version: 2.10
|
|
;; URL: https://github.com/tanrax/org-social.el
|
|
|
|
;; This file is NOT part of GNU Emacs.
|
|
|
|
;; This program is free software; you can redistribute it and/or
|
|
;; modify it under the terms of the GNU General Public License as
|
|
;; published by the Free Software Foundation, either version 3 of the
|
|
;; License, or (at your option) any later version.
|
|
|
|
;; This program is distributed in the hope that it will be useful, but
|
|
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;; General Public License for more details.
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
|
;; along with this program. If not, see
|
|
;; <http://www.gnu.org/licenses/>.
|
|
|
|
;;; Commentary:
|
|
|
|
;; Functions for parsing Org-social feeds and extracting data.
|
|
|
|
;;; Code:
|
|
|
|
(require 'org-social-variables)
|
|
|
|
;; Forward declarations
|
|
(declare-function org-social--format-date "org-social" (timestamp))
|
|
|
|
(defun org-social-parser--generate-timestamp ()
|
|
"Generate a timestamp in RFC 3339 format for use as post ID.
|
|
Follows Org Social specification format."
|
|
(let ((timestamp (format-time-string "%FT%T%z")))
|
|
;; Ensure proper timezone format (+##:## instead of +####)
|
|
(if (string-match "\\(.*[+-][0-9]\{2\}\\)\\([0-9]\{2\}\\)$" timestamp)
|
|
(format "%s:%s" (match-string 1 timestamp) (match-string 2 timestamp))
|
|
timestamp)))
|
|
|
|
(defun org-social-parser--format-timestamp (timestamp)
|
|
"Format TIMESTAMP to human-readable date format: '24 Sep 2025, 15:30'.
|
|
TIMESTAMP should be in RFC 3339 format or a time value."
|
|
(org-social--format-date timestamp))
|
|
|
|
(defun org-social-parser--get-value (feed key)
|
|
"Extract values from an Org-social FEED based on KEY.
|
|
Ignores lines inside source blocks (#+BEGIN_SRC...#+END_SRC)."
|
|
(when (and (stringp feed) (stringp key))
|
|
(let* ((lines (split-string feed "\n"))
|
|
(regex (concat "^#\\+" (regexp-quote key) ":\\s-*\\(.+\\)$"))
|
|
(in-src-block nil)
|
|
values)
|
|
(dolist (line lines)
|
|
;; Check if we're entering or leaving a source block
|
|
(cond
|
|
((string-match-p "^#\\+BEGIN_SRC\\|^#\\+BEGIN_EXAMPLE\\|^#\\+BEGIN_QUOTE" line)
|
|
(setq in-src-block t))
|
|
((string-match-p "^#\\+END_SRC\\|^#\\+END_EXAMPLE\\|^#\\+END_QUOTE" line)
|
|
(setq in-src-block nil))
|
|
;; Only match the regex if we're not in a source block
|
|
((and (not in-src-block) (string-match regex line))
|
|
(let ((value (string-trim (match-string 1 line))))
|
|
(setq values (cons value values))))))
|
|
(if values
|
|
(if (= (length values) 1)
|
|
(car values)
|
|
(reverse values))
|
|
nil))))
|
|
|
|
(defun org-social-parser--parse-follow (follow-line)
|
|
"Parse a FOLLOW line into name and URL components.
|
|
Argument FOLLOW-LINE text."
|
|
(when follow-line
|
|
(let ((parts (split-string follow-line " " t)))
|
|
(if (>= (length parts) 2)
|
|
(list (cons 'name (car parts))
|
|
(cons 'url (cadr parts)))
|
|
(list (cons 'name nil)
|
|
(cons 'url (car parts)))))))
|
|
|
|
(defun org-social-parser--parse-group (group-line)
|
|
"Parse a GROUP line into name and relay URL components.
|
|
GROUP-LINE should be in format: \\='Group Name https://relay.example.com\\='.
|
|
The group name can contain spaces, and the relay URL is the last
|
|
space-separated token."
|
|
(when group-line
|
|
(let* ((trimmed (string-trim group-line))
|
|
;; Find the last space to separate name from URL
|
|
(last-space-pos (string-match " [^ ]+$" trimmed)))
|
|
(if last-space-pos
|
|
(let ((name (string-trim (substring trimmed 0 last-space-pos)))
|
|
(url (string-trim (substring trimmed (1+ last-space-pos)))))
|
|
(list (cons 'name name)
|
|
(cons 'relay-url url)))
|
|
;; No space found, treat entire line as name with no URL
|
|
(list (cons 'name trimmed)
|
|
(cons 'relay-url nil))))))
|
|
|
|
(defun org-social-parser--get-my-profile ()
|
|
"Get the current user's profile from their Org-social file."
|
|
;; Ensure vfile is downloaded if needed
|
|
(when (and (fboundp 'org-social-file--ensure-vfile-downloaded)
|
|
(fboundp 'org-social-file--is-vfile-p)
|
|
(org-social-file--is-vfile-p org-social-file))
|
|
(org-social-file--ensure-vfile-downloaded))
|
|
|
|
(let ((feed nil)
|
|
(target-file (if (and (fboundp 'org-social-file--is-vfile-p)
|
|
(org-social-file--is-vfile-p org-social-file)
|
|
(fboundp 'org-social-file--get-local-file-path))
|
|
(org-social-file--get-local-file-path org-social-file)
|
|
org-social-file)))
|
|
(when (file-exists-p target-file)
|
|
(with-temp-buffer
|
|
(insert-file-contents target-file)
|
|
;; Process migrations if the function exists
|
|
(when (fboundp 'org-social-file--process-migrations)
|
|
(org-social-file--process-migrations))
|
|
(setq feed (buffer-string)))
|
|
(let* ((follows (org-social-parser--get-value feed "FOLLOW"))
|
|
(follows-list (if (listp follows) follows (list follows)))
|
|
(groups (org-social-parser--get-value feed "GROUP"))
|
|
(groups-list (if (listp groups) groups (list groups))))
|
|
(list
|
|
(cons 'id (gensym))
|
|
(cons 'nick (org-social-parser--get-value feed "NICK"))
|
|
(cons 'title (org-social-parser--get-value feed "TITLE"))
|
|
(cons 'description (org-social-parser--get-value feed "DESCRIPTION"))
|
|
(cons 'avatar (org-social-parser--get-value feed "AVATAR"))
|
|
(cons 'location (org-social-parser--get-value feed "LOCATION"))
|
|
(cons 'birthday (org-social-parser--get-value feed "BIRTHDAY"))
|
|
(cons 'language (org-social-parser--get-value feed "LANGUAGE"))
|
|
(cons 'pinned (org-social-parser--get-value feed "PINNED"))
|
|
(cons 'url (or org-social-my-public-url org-social-file)) ; Use public URL if available, fallback to file path
|
|
(cons 'follow (mapcar #'org-social-parser--parse-follow (delq nil follows-list)))
|
|
(cons 'group (mapcar #'org-social-parser--parse-group (delq nil groups-list)))
|
|
(cons 'posts (org-social-parser--get-posts-from-feed feed)))))))
|
|
|
|
(defun org-social-parser--extract-property (text prop-name)
|
|
"Extract a property value from TEXT according to Org Social specification.
|
|
PROP-NAME should be the property name without colons.
|
|
Validates format according to specification - ignores invalid values."
|
|
(when (and (stringp text)
|
|
(string-match (format ":%s:\\s-*\\([^\n]+\\)" (regexp-quote prop-name)) text))
|
|
(let ((value (string-trim (match-string 1 text))))
|
|
;; Enhanced validation according to Org Social specification
|
|
(when (and (not (string-empty-p value))
|
|
(not (string-match-p "^:END:$" value))
|
|
(not (string-match-p "^#" value))
|
|
(not (string-match-p "^:" value)) ; Property lines
|
|
(org-social-parser--validate-property prop-name value))
|
|
value))))
|
|
|
|
(defun org-social-parser--get-posts-from-feed (feed)
|
|
"Extract posts from an Org-social FEED."
|
|
(if (not (stringp feed))
|
|
nil ; Return empty list if feed is nil or not a string
|
|
(let ((posts nil))
|
|
(with-temp-buffer
|
|
(insert feed)
|
|
(goto-char (point-min))
|
|
(when (re-search-forward "^\\* Posts" nil t)
|
|
(while (re-search-forward "^\\*\\*\\($\\|[^*]\\)" nil t)
|
|
(let ((post-start (point))
|
|
post-end id id-from-header id-from-properties text date properties-text)
|
|
;; Extract ID from header (after **)
|
|
(beginning-of-line)
|
|
(when (looking-at "^\\*\\*\\s-+\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}T[0-9]\\{2\\}:[0-9]\\{2\\}:[0-9]\\{2\\}[+-][0-9]\\{2\\}\\(:[0-9]\\{2\\}\\|[0-9]\\{2\\}\\)\\)")
|
|
(setq id-from-header (match-string 1)))
|
|
(end-of-line)
|
|
(forward-char)
|
|
(setq post-start (point))
|
|
|
|
;; Find the end of this post (next ** or end of buffer)
|
|
(if (re-search-forward "^\\*\\*\\($\\|[^*]\\)" nil t)
|
|
(progn
|
|
(backward-char) ; Move back to the line with **
|
|
(beginning-of-line)
|
|
(setq post-end (point))
|
|
(goto-char post-start))
|
|
(setq post-end (point-max))
|
|
(goto-char post-start))
|
|
|
|
;; Extract the full properties section
|
|
(when (re-search-forward ":PROPERTIES:" post-end t)
|
|
(let ((prop-start (point)))
|
|
(when (re-search-forward ":END:" post-end t)
|
|
(setq properties-text (buffer-substring-no-properties prop-start (point))))))
|
|
|
|
;; Extract ID from properties
|
|
(setq id-from-properties (org-social-parser--extract-property properties-text "ID"))
|
|
|
|
;; Use ID from header if present, otherwise use ID from properties
|
|
;; According to Org Social spec v1.6: if both are present, header takes priority
|
|
(setq id (or id-from-header id-from-properties))
|
|
(when id (setq date (safe-date-to-time id)))
|
|
|
|
;; Extract text content (after :END:)
|
|
(goto-char post-start)
|
|
(when (re-search-forward ":END:" post-end t)
|
|
(forward-line)
|
|
(let ((content-lines '()))
|
|
;; Process each line and filter out comments
|
|
(while (< (point) post-end)
|
|
(let ((line-start (point)))
|
|
(end-of-line)
|
|
(let ((line (buffer-substring-no-properties line-start (point))))
|
|
;; Filter out comments (#), properties (:), and property comments (#:)
|
|
(unless (or (string-match-p "^\\s-*#" line)
|
|
(string-match-p "^\\s-*:" line)
|
|
(string-match-p "^\\s-*#:" line))
|
|
(push line content-lines)))
|
|
(forward-line 1)))
|
|
(setq text (string-trim (string-join (reverse content-lines) "\n")))))
|
|
|
|
;; Add post if we have required data
|
|
;; Properties are optional, so we only require id, text, and date
|
|
(when (and id text date)
|
|
(let ((post-data (list
|
|
(cons 'id (gensym))
|
|
(cons 'timestamp id)
|
|
(cons 'date (float-time date))
|
|
(cons 'text text))))
|
|
|
|
;; Extract only official properties according to Org Social specification
|
|
;; Official properties: LANG, TAGS, CLIENT, REPLY_TO, POLL_END, POLL_OPTION, GROUP, MOOD, INCLUDE, VISIBILITY
|
|
(dolist (prop '("LANG" "TAGS" "CLIENT" "REPLY_TO" "POLL_END"
|
|
"POLL_OPTION" "GROUP" "MOOD" "INCLUDE" "VISIBILITY"))
|
|
(let ((value (org-social-parser--extract-property properties-text prop)))
|
|
(when value
|
|
(setq post-data (cons (cons (intern (downcase prop)) value) post-data)))))
|
|
|
|
(setq posts (cons post-data posts))))
|
|
|
|
;; Move to the post we found (if any)
|
|
(when (< (point) post-end)
|
|
(goto-char post-end))))))
|
|
(reverse posts))))
|
|
|
|
(defun org-social-parser--validate-property (prop-name value)
|
|
"Validate PROP-NAME VALUE according to Org Social specification.
|
|
Returns t if valid, nil if invalid (should be ignored)."
|
|
(cond
|
|
;; REPLY_TO must be URL#timestamp format (accepts both +0200 and +02:00 timezone formats)
|
|
((string= prop-name "REPLY_TO")
|
|
(string-match-p "^https?://[^#]+#[0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}T[0-9]\\{2\\}:[0-9]\\{2\\}:[0-9]\\{2\\}[+-][0-9]\\{2\\}" value))
|
|
;; INCLUDE must be URL#timestamp format (for boosting posts)
|
|
((string= prop-name "INCLUDE")
|
|
(string-match-p "^https?://[^#]+#[0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}T[0-9]\\{2\\}:[0-9]\\{2\\}:[0-9]\\{2\\}[+-][0-9]\\{2\\}" value))
|
|
;; POLL_END must be RFC 3339 format
|
|
((string= prop-name "POLL_END")
|
|
(string-match-p "^[0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}T[0-9]\\{2\\}:[0-9]\\{2\\}:[0-9]\\{2\\}" value))
|
|
;; GROUP must be: groupname relayurl format
|
|
;; Group name can have spaces, URL must be the last part
|
|
((string= prop-name "GROUP")
|
|
(let ((parts (split-string value "\\s-+" t)))
|
|
(and (>= (length parts) 2)
|
|
(string-match-p "^https?://" (car (last parts))))))
|
|
;; LANG must be language code (2-5 chars, letters/hyphens only)
|
|
((string= prop-name "LANG")
|
|
(string-match-p "^[a-z]\\{2,5\\}\\(-[a-z]\\{2,3\\}\\)?$" value))
|
|
;; TAGS must be space-separated alphanumeric words
|
|
((string= prop-name "TAGS")
|
|
(string-match-p "^[a-zA-Z0-9_-]+\\(\\s-+[a-zA-Z0-9_-]+\\)*$" value))
|
|
;; CLIENT, POLL_OPTION, MOOD - just check they're reasonable text
|
|
((member prop-name '("CLIENT" "POLL_OPTION" "MOOD"))
|
|
(and (< (length value) 200) ; Reasonable length limit
|
|
(null (string-match-p "[\n\r]" value)))) ; No newlines
|
|
;; VISIBILITY must be either "public" or "mention"
|
|
((string= prop-name "VISIBILITY")
|
|
(or (string= value "public")
|
|
(string= value "mention")))
|
|
;; Default: accept other properties
|
|
(t t)))
|
|
|
|
(defun org-social-parser--validate-id (id)
|
|
"Validate that ID is in proper RFC 3339 format according to specification.
|
|
Accepts formats: ####-##-##T##:##:##+##:## or ####-##-##T##:##:##-####"
|
|
(when (stringp id)
|
|
(or (string-match-p "^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}[+-][0-9]\{2\}:[0-9]\{2\}$" id)
|
|
(string-match-p "^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}[+-][0-9]\{4\}$" id))))
|
|
|
|
(defun org-social-parser--extract-mentioned-urls (text)
|
|
"Extract all org-social URLs mentioned in TEXT.
|
|
Returns a list of URLs from [[org-social:URL][name]] links."
|
|
(when (stringp text)
|
|
(let ((urls '())
|
|
(pos 0))
|
|
(while (string-match "\\[\\[org-social:\\([^]]+\\)\\]\\[\\([^]]+\\)\\]\\]" text pos)
|
|
(let ((url (match-string 1 text)))
|
|
(push url urls)
|
|
(setq pos (match-end 0))))
|
|
(reverse urls))))
|
|
|
|
(provide 'org-social-parser)
|
|
;;; org-social-parser.el ends here
|