mirror of
https://github.com/tanrax/org-social-rss-bridge
synced 2026-01-07 05:43:34 +01:00
171 lines
5.2 KiB
Python
171 lines
5.2 KiB
Python
import os
|
|
from flask import Flask, Response
|
|
from flask_caching import Cache
|
|
import feedparser
|
|
from datetime import datetime
|
|
from html2text import html2text
|
|
import re
|
|
import urllib.request
|
|
import xml.etree.ElementTree as ET
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Environment variables
|
|
RSS_FEED_URL = os.getenv('RSS_FEED_URL', '')
|
|
NICK = os.getenv('NICK', 'rss-bridge')
|
|
TITLE = os.getenv('TITLE', 'RSS Bridge')
|
|
DESCRIPTION = os.getenv('DESCRIPTION', 'RSS to Org Social bridge')
|
|
AVATAR = os.getenv('AVATAR', '')
|
|
CONTACT = os.getenv('CONTACT', '')
|
|
LANG = os.getenv('LANG', 'en')
|
|
PORT = int(os.getenv('PORT', '5000'))
|
|
DEBUG = os.getenv('DEBUG', 'false').lower() in ('true', '1', 'yes')
|
|
CACHE_TIMEOUT = int(os.getenv('CACHE_TIMEOUT', '300')) # 5 minutes default
|
|
|
|
# Configure cache
|
|
app.config['CACHE_TYPE'] = 'SimpleCache'
|
|
app.config['CACHE_DEFAULT_TIMEOUT'] = CACHE_TIMEOUT
|
|
cache = Cache(app)
|
|
|
|
|
|
def html_to_org(html_content):
|
|
"""Convert HTML to Org mode format using html2text"""
|
|
if not html_content:
|
|
return ""
|
|
|
|
# Convert HTML to markdown-like text
|
|
text = html2text(html_content)
|
|
|
|
# Clean up excessive newlines
|
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
|
|
# Convert markdown-style links to org-mode links
|
|
text = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', r'[[\2][\1]]', text)
|
|
|
|
# Convert markdown bold to org bold
|
|
text = re.sub(r'\*\*([^\*]+)\*\*', r'*\1*', text)
|
|
|
|
# Convert markdown italic to org italic
|
|
text = re.sub(r'_([^_]+)_', r'/\1/', text)
|
|
|
|
# Clean up any remaining artifacts
|
|
text = text.strip()
|
|
|
|
return text
|
|
|
|
|
|
def parse_rss_to_org(feed_url):
|
|
"""Parse RSS/Atom feed and convert to Org Social format"""
|
|
|
|
if not feed_url:
|
|
return "Error: RSS_FEED_URL environment variable not set"
|
|
|
|
# Parse the feed
|
|
feed = feedparser.parse(feed_url)
|
|
|
|
# Check if feed has entries
|
|
if not feed.entries:
|
|
error_msg = f"No entries found in feed. This might be due to an unsupported feed format."
|
|
if feed.bozo and hasattr(feed, 'bozo_exception'):
|
|
error_msg += f" Parser error: {feed.bozo_exception}"
|
|
return error_msg
|
|
|
|
# Build the Org Social file
|
|
org_content = []
|
|
|
|
# Header metadata
|
|
feed_title = TITLE or feed.feed.get('title', 'RSS Bridge')
|
|
org_content.append(f"#+TITLE: {feed_title}")
|
|
org_content.append(f"#+NICK: {NICK}")
|
|
|
|
if DESCRIPTION:
|
|
org_content.append(f"#+DESCRIPTION: {DESCRIPTION}")
|
|
elif hasattr(feed.feed, 'subtitle'):
|
|
org_content.append(f"#+DESCRIPTION: {feed.feed.subtitle}")
|
|
|
|
if AVATAR:
|
|
org_content.append(f"#+AVATAR: {AVATAR}")
|
|
elif hasattr(feed.feed, 'image') and 'href' in feed.feed.image:
|
|
org_content.append(f"#+AVATAR: {feed.feed.image.href}")
|
|
|
|
if CONTACT:
|
|
org_content.append(f"#+CONTACT: {CONTACT}")
|
|
|
|
org_content.append("")
|
|
org_content.append("* Posts")
|
|
|
|
# Process each entry (reverse order: oldest to newest)
|
|
for entry in reversed(feed.entries):
|
|
org_content.append("**")
|
|
org_content.append(":PROPERTIES:")
|
|
|
|
# ID (timestamp) - required field
|
|
if hasattr(entry, 'published_parsed') and entry.published_parsed:
|
|
dt = datetime(*entry.published_parsed[:6])
|
|
timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S+0000')
|
|
elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
|
|
dt = datetime(*entry.updated_parsed[:6])
|
|
timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S+0000')
|
|
else:
|
|
# Fallback to current time if no date available
|
|
timestamp = datetime.now().strftime('%Y-%m-%dT%H:%M:%S+0000')
|
|
|
|
org_content.append(f":ID: {timestamp}")
|
|
org_content.append(f":LANG: {LANG}")
|
|
|
|
# Tags
|
|
if hasattr(entry, 'tags') and entry.tags:
|
|
tags = ' '.join([tag.term for tag in entry.tags if hasattr(tag, 'term')])
|
|
if tags:
|
|
org_content.append(f":TAGS: {tags}")
|
|
|
|
org_content.append(":END:")
|
|
org_content.append("")
|
|
|
|
# Title
|
|
if hasattr(entry, 'title') and entry.title:
|
|
org_content.append(f"*** {entry.title}")
|
|
org_content.append("")
|
|
|
|
# Content
|
|
content = ""
|
|
if hasattr(entry, 'content') and entry.content:
|
|
content = entry.content[0].value
|
|
elif hasattr(entry, 'summary') and entry.summary:
|
|
content = entry.summary
|
|
elif hasattr(entry, 'description') and entry.description:
|
|
content = entry.description
|
|
|
|
if content:
|
|
org_text = html_to_org(content)
|
|
org_content.append(org_text)
|
|
org_content.append("")
|
|
|
|
# Link to original
|
|
if hasattr(entry, 'link') and entry.link:
|
|
org_content.append(f"[[{entry.link}][Original post]]")
|
|
org_content.append("")
|
|
|
|
org_content.append("")
|
|
|
|
return '\n'.join(org_content)
|
|
|
|
|
|
@app.route('/')
|
|
@cache.cached()
|
|
def index():
|
|
"""Main endpoint that returns the Org Social file"""
|
|
org_content = parse_rss_to_org(RSS_FEED_URL)
|
|
|
|
return Response(org_content, mimetype='text/plain; charset=utf-8')
|
|
|
|
|
|
@app.route('/health')
|
|
def health():
|
|
"""Health check endpoint"""
|
|
return {'status': 'ok', 'rss_feed_url': RSS_FEED_URL}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
app.run(host='0.0.0.0', port=PORT, debug=DEBUG)
|