mirror of
https://github.com/tanrax/org-social-static-preview
synced 2026-01-08 06:13:35 +01:00
Source blocks (#+BEGIN_SRC / #+END_SRC) were not rendering correctly in preview cards. The content was being displayed as raw Org Mode syntax instead of formatted code blocks. This fix: - Detects source blocks using regex pattern matching - Extracts code content and language identifier - HTML-escapes code content to prevent XSS - Replaces blocks with placeholders before other formatting - Restores blocks as styled <pre><code> elements after line break conversion Fixes issue where comments and code in source blocks were invisible in share preview links.
366 lines
12 KiB
Python
366 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import re
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from jinja2 import Environment, FileSystemLoader
|
|
import argparse
|
|
|
|
|
|
class OrgSocialParser:
|
|
def __init__(self):
|
|
self.metadata = {}
|
|
self.posts = []
|
|
|
|
def parse_file(self, file_path):
|
|
"""Parse the org social file and extract metadata and posts"""
|
|
self.metadata = {}
|
|
self.posts = []
|
|
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
# Extract global metadata
|
|
self._extract_metadata(content)
|
|
|
|
# Extract posts
|
|
self._extract_posts(content)
|
|
|
|
return self.posts
|
|
|
|
def _extract_metadata(self, content):
|
|
"""Extract global metadata from the org file"""
|
|
metadata_patterns = {
|
|
"TITLE": r"^\s*\#\+TITLE:\s*(.+)$",
|
|
"NICK": r"^\s*\#\+NICK:\s*(.+)$",
|
|
"DESCRIPTION": r"^\s*\#\+DESCRIPTION:\s*(.+)$",
|
|
"AVATAR": r"^\s*\#\+AVATAR:\s*(.+)$",
|
|
}
|
|
|
|
for key, pattern in metadata_patterns.items():
|
|
match = re.search(pattern, content, re.MULTILINE)
|
|
if match:
|
|
self.metadata[key] = match.group(1).strip()
|
|
|
|
def _extract_posts(self, content):
|
|
"""Extract all posts from the org file"""
|
|
# Find the Posts section
|
|
posts_pattern = r"^\*\s+Posts\s*$"
|
|
posts_section_match = re.search(posts_pattern, content, re.MULTILINE)
|
|
if not posts_section_match:
|
|
print("Posts section not found")
|
|
return
|
|
|
|
posts_content = content[posts_section_match.end() :]
|
|
|
|
# Find all ** headers (posts) - looking for ** at start of line
|
|
post_pattern = r"^(\*\*)\s*$"
|
|
post_positions = []
|
|
|
|
for match in re.finditer(post_pattern, posts_content, re.MULTILINE):
|
|
post_positions.append(match.end())
|
|
|
|
if not post_positions:
|
|
print("No headers found in Posts section")
|
|
return
|
|
|
|
print(f"Found {len(post_positions)} headers")
|
|
|
|
# Extract content between ** headers
|
|
for i, start_pos in enumerate(post_positions):
|
|
# Find the end of this post (next ** or end of content)
|
|
if i + 1 < len(post_positions):
|
|
# Find the next ** header
|
|
next_start = post_positions[i + 1]
|
|
# Go back to find the actual ** line
|
|
temp_content = posts_content[:next_start]
|
|
last_newline = temp_content.rfind("\n**")
|
|
if last_newline != -1:
|
|
end_pos = last_newline
|
|
else:
|
|
end_pos = next_start
|
|
else:
|
|
end_pos = len(posts_content)
|
|
|
|
block = posts_content[start_pos:end_pos].strip()
|
|
|
|
if block:
|
|
post = self._parse_post_block(block)
|
|
if post and post.get("ID"):
|
|
self.posts.append(post)
|
|
print(f"Post added with ID: {post.get('ID')}")
|
|
|
|
def _parse_post_block(self, block):
|
|
"""Parse a single post block"""
|
|
post = {}
|
|
|
|
# Extract properties
|
|
properties_match = re.search(r":PROPERTIES:\s*\n(.*?)\n:END:", block, re.DOTALL)
|
|
if properties_match:
|
|
properties_content = properties_match.group(1)
|
|
|
|
# Parse each property using simple string operations
|
|
for line in properties_content.split("\n"):
|
|
line = line.strip()
|
|
if line and line.startswith(":") and line.count(":") >= 2:
|
|
# Find the second colon
|
|
first_colon = line.find(":", 1)
|
|
if first_colon != -1:
|
|
key = line[1:first_colon].strip()
|
|
value = line[first_colon + 1 :].strip()
|
|
if key:
|
|
post[key] = value
|
|
|
|
# Extract post content (everything after :END:)
|
|
end_match = re.search(r":END:\s*\n", block)
|
|
if end_match:
|
|
content = block[end_match.end() :].strip()
|
|
post["content"] = content
|
|
else:
|
|
# No properties block, entire block is content
|
|
post["content"] = block
|
|
|
|
return post
|
|
|
|
|
|
class PreviewGenerator:
|
|
def __init__(self, template_dir=".", template_name="template.html"):
|
|
self.env = Environment(loader=FileSystemLoader(template_dir))
|
|
|
|
def og_description(value, max_length=120):
|
|
import re
|
|
|
|
# Replace newlines with spaces
|
|
text = value.replace("\r\n", " ").replace("\n", " ").replace("\r", " ")
|
|
# Collapse all whitespace to single spaces
|
|
text = re.sub(r"\s+", " ", text)
|
|
# HTML tag filter
|
|
text = re.sub(r"<[^>]+>", "", text)
|
|
# Collapse multiple spaces
|
|
text = re.sub(r" +", " ", text)
|
|
if len(text) > max_length:
|
|
text = text[:max_length].rstrip() + "..."
|
|
return text.strip()
|
|
|
|
self.env.filters["og_description"] = og_description
|
|
self.template = self.env.get_template(template_name)
|
|
|
|
def generate_preview(self, post, metadata):
|
|
"""Generate HTML preview for a single post"""
|
|
feed_url = metadata.get("FEED_URL", "")
|
|
context = self._prepare_context(post, metadata, feed_url)
|
|
return self.template.render(**context)
|
|
|
|
def _prepare_context(self, post, metadata, feed_url):
|
|
"""Prepare context data for template rendering"""
|
|
post_id = post.get("ID", "")
|
|
content = post.get("content", "")
|
|
mood = post.get("MOOD", "")
|
|
lang = post.get("LANG", "es")
|
|
tags = post.get("TAGS", "")
|
|
reply_to = post.get("REPLY_TO", "")
|
|
client = post.get("CLIENT", "")
|
|
|
|
formatted_content = self._format_content(content, mood, reply_to)
|
|
|
|
nick = metadata.get("NICK", "User")
|
|
title = metadata.get("TITLE", "socia.org")
|
|
description = metadata.get("DESCRIPTION", "")
|
|
avatar_url = metadata.get("AVATAR", "")
|
|
|
|
formatted_time = self._format_timestamp(post_id)
|
|
tags_list = tags.split() if tags else []
|
|
|
|
post_url = f"{feed_url}#{post_id}" if feed_url and post_id else ""
|
|
|
|
return {
|
|
"post_id": post_id,
|
|
"content": content,
|
|
"formatted_content": formatted_content,
|
|
"mood": mood,
|
|
"language": lang,
|
|
"tags": tags_list,
|
|
"tags_string": tags,
|
|
"reply_to": reply_to,
|
|
"client": client,
|
|
"is_reply": bool(reply_to),
|
|
"has_mood": bool(mood),
|
|
"has_tags": bool(tags),
|
|
"has_content": bool(content.strip()),
|
|
"nick": nick,
|
|
"title": title,
|
|
"description": description,
|
|
"avatar_url": avatar_url,
|
|
"has_avatar": bool(avatar_url),
|
|
"user_initial": nick[0].upper() if nick else "U",
|
|
"formatted_time": formatted_time,
|
|
"timestamp": post_id,
|
|
"post_url": post_url,
|
|
}
|
|
|
|
def _format_content(self, content, mood, reply_to):
|
|
"""Format post content"""
|
|
if not content.strip() and mood:
|
|
return f'<span style="font-size: 20px;">{mood}</span>'
|
|
|
|
formatted = content
|
|
|
|
# Handle source code blocks first (before other replacements)
|
|
code_blocks = []
|
|
code_block_pattern = r"#\+BEGIN_SRC\s+([\w-]+)?\s*\n(.*?)#\+END_SRC"
|
|
|
|
def replace_code_block(match):
|
|
lang = match.group(1) or "text"
|
|
code = match.group(2)
|
|
# Remove trailing newline before END_SRC if present
|
|
code = code.rstrip("\n")
|
|
# HTML escape the code content
|
|
code_escaped = (
|
|
code.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
)
|
|
placeholder = f"___CODE_BLOCK_{len(code_blocks)}___"
|
|
code_blocks.append(
|
|
{"lang": lang, "code": code_escaped, "placeholder": placeholder}
|
|
)
|
|
return placeholder
|
|
|
|
# Replace code blocks with placeholders
|
|
formatted = re.sub(
|
|
code_block_pattern,
|
|
replace_code_block,
|
|
formatted,
|
|
flags=re.DOTALL | re.IGNORECASE,
|
|
)
|
|
|
|
# Handle org-social mentions
|
|
formatted = re.sub(
|
|
r"\[\[org-social:([^\]]+)\]\[([^\]]+)\]\]",
|
|
r'<a href="#" style="color: #1d9bf0;">@\2</a>',
|
|
formatted,
|
|
)
|
|
|
|
# Handle regular links
|
|
formatted = re.sub(
|
|
r"\[\[([^\]]+)\]\[([^\]]+)\]\]",
|
|
r'<a href="\1" style="color: #1d9bf0;" target="_blank">\2</a>',
|
|
formatted,
|
|
)
|
|
|
|
# Handle simple URLs
|
|
formatted = re.sub(
|
|
r"\[\[([^\]]+)\]\]",
|
|
r'<a href="\1" style="color: #1d9bf0;" target="_blank">\1</a>',
|
|
formatted,
|
|
)
|
|
|
|
# Convert line breaks
|
|
formatted = formatted.replace("\n", "<br>")
|
|
|
|
# Restore code blocks with proper HTML formatting
|
|
for block in code_blocks:
|
|
code_html = f'<pre style="background-color: #f6f8fa; padding: 16px; border-radius: 6px; overflow-x: auto; margin: 10px 0;"><code class="language-{block["lang"]}">{block["code"]}</code></pre>'
|
|
formatted = formatted.replace(block["placeholder"], code_html)
|
|
|
|
# Mood is now displayed in the header, not in content
|
|
|
|
return formatted or "No content"
|
|
|
|
def _format_timestamp(self, timestamp):
|
|
"""Format timestamp for display"""
|
|
try:
|
|
dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
return dt.strftime("%Y-%m-%d")
|
|
except Exception:
|
|
return "2024-01-01"
|
|
|
|
|
|
class OrgSocialPreviewGenerator:
|
|
def __init__(
|
|
self, social_file, preview_dir, template_dir=".", template_name="template.html"
|
|
):
|
|
self.social_file = Path(social_file).resolve()
|
|
self.preview_dir = Path(preview_dir)
|
|
self.parser = OrgSocialParser()
|
|
self.generator = PreviewGenerator(template_dir, template_name)
|
|
|
|
# Create preview directory if it doesn't exist
|
|
self.preview_dir.mkdir(exist_ok=True)
|
|
|
|
def generate_all_previews(self):
|
|
"""Generate all preview files"""
|
|
try:
|
|
# Clear existing HTML files
|
|
print("Cleaning existing HTML files...")
|
|
deleted_count = 0
|
|
for existing_file in self.preview_dir.glob("*.html"):
|
|
existing_file.unlink()
|
|
deleted_count += 1
|
|
print(f"Deleted {deleted_count} files")
|
|
|
|
# Parse posts
|
|
posts = self.parser.parse_file(self.social_file)
|
|
print(f"Processed {len(posts)} posts")
|
|
|
|
# Generate new previews
|
|
generated_count = 0
|
|
for post in posts:
|
|
post_id = post.get("ID", "")
|
|
if not post_id:
|
|
continue
|
|
|
|
# Generate safe filename from ID
|
|
safe_filename = post_id.replace(":", "-").replace("+", "plus")
|
|
preview_path = self.preview_dir / f"{safe_filename}.html"
|
|
|
|
html = self.generator.generate_preview(post, self.parser.metadata)
|
|
|
|
with open(preview_path, "w", encoding="utf-8") as f:
|
|
f.write(html)
|
|
|
|
print(f"Generated: {preview_path.name}")
|
|
generated_count += 1
|
|
|
|
print(f"Completed: {generated_count} files generated")
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate HTML previews for Org Social posts"
|
|
)
|
|
parser.add_argument("--social-file", "-s", default="social.org")
|
|
parser.add_argument("--preview-dir", "-p", default="preview")
|
|
parser.add_argument("--template-dir", "-td", default=".")
|
|
parser.add_argument("--template-name", "-tn", default="template.html")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Verify files exist
|
|
if not Path(args.social_file).exists():
|
|
print(f"Error: {args.social_file} not found")
|
|
return 1
|
|
|
|
template_path = Path(args.template_dir) / args.template_name
|
|
if not template_path.exists():
|
|
print(f"Error: {template_path} not found")
|
|
return 1
|
|
|
|
# Create generator and run
|
|
generator = OrgSocialPreviewGenerator(
|
|
args.social_file, args.preview_dir, args.template_dir, args.template_name
|
|
)
|
|
|
|
return generator.generate_all_previews()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit(main())
|