From d3262d89b5124ff532a0f680ba5e8aa451d06cd0 Mon Sep 17 00:00:00 2001 From: Dom Rodriguez Date: Tue, 18 Aug 2020 00:24:10 +0100 Subject: [PATCH] [init](codebase): Initial commit Total rewrite of singlerss, better and better than before! Signed-off-by: Dom Rodriguez --- .env.sample | 3 + .gitignore | 1 + LICENSE | 201 ++++++++++++++++++++++++++++++++++++ README.md | 37 +++++++ requirements.txt | 5 + res/cron/crontab | 5 + res/systemd/rss.service | 26 +++++ res/systemd/rss.timer | 8 ++ singlerss.py | 223 ++++++++++++++++++++++++++++++++++++++++ 9 files changed, 509 insertions(+) create mode 100644 .env.sample create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 res/cron/crontab create mode 100644 res/systemd/rss.service create mode 100644 res/systemd/rss.timer create mode 100755 singlerss.py diff --git a/.env.sample b/.env.sample new file mode 100644 index 0000000..bfd8665 --- /dev/null +++ b/.env.sample @@ -0,0 +1,3 @@ +SINGLERSS_FEED_OUT_PATH= +SINGLERSS_FEED_LIST_PATH= +SINGLERSS_FEED_OUT_TYPE= diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f10862a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.env diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..d1438fd --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +singlerss +========= + +# Description + +singlerss combines all feeds described in a OPML file into one feed. This can +either be outputted into `stdout` or a file, as specifed by program arguments, +and configured by the environment variables. + +# Configuration + +SingleRSS is configured by environment variables. + +See `.env.sample`. You _must_ copy `.env.sample` to `.env`. + +`SINGLERSS_FEED_OUT_PATH` defines the relative OR absolute path to output the +feed to, _IF_ `SINGLERSS_FEED_OUT_TYPE` is set to `file`. If +`SINGLERSS_FEED_OUT_TYPE` is set to `stdout`, you must redirect output to the +file you want it written to. + +`SINGLERSS_FEED_LIST_PATH` must be set to the input list of feeds you want to be +collated into one feed. This _must_ be a newline delimited file of URLs. + +## Running + +You may run this directly, after sourcing `.env` and exporting the variables, +with `./singlerss.py`. Alternatively, I have provided a systemd unit and timer, +which I will offer support for, and a basic crontab. I do not use cron, so I +cannot offer support for it. + +# Licensing + +This program is [licensed][license] under the Apache License 2.0. + +Copyright (c) Dom Rodriguez (shymega) 2020. + +[license]: /LICENSE diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9e66167 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +feedgen==0.9.0 +feedparser==5.2.1 +lxml==4.5.2 +python-dateutil==2.8.1 +six==1.15.0 diff --git a/res/cron/crontab b/res/cron/crontab new file mode 100644 index 0000000..6328ac0 --- /dev/null +++ b/res/cron/crontab @@ -0,0 +1,5 @@ +# For stdout to file +0 * * * * cd /opt/rss && . ./.env && /opt/rss/singlerss.py > /var/www/html/feeds.xml + +# For file direct. +0 * * * * cd /opt/rss && . ./.env && /opt/rss/singlerss.py diff --git a/res/systemd/rss.service b/res/systemd/rss.service new file mode 100644 index 0000000..c358582 --- /dev/null +++ b/res/systemd/rss.service @@ -0,0 +1,26 @@ +[Unit] +Description=Generate combined RSS feed. + + +[Service] +Type=oneshot +# Make sure to set user and group +# to your setup. +User=nginx +Group=nobody + +# Make sure to customise these to your +# system! +WorkingDirectory=/var/www/html/feed.xml +# And this. +EnvironmentFile=/opt/singlerss/.env +# And this. +ExecStart=/opt/rss/singlerss.py + +# These don't work on older systemd versions. +# In that case, you should configure singlerss +# to output to file, as documented in the README, +# and comment these directives out. +StandardInput=null +StandardError=journal +StandardOutput=file:$SINGLERSS_FEED_OUT_PATH diff --git a/res/systemd/rss.timer b/res/systemd/rss.timer new file mode 100644 index 0000000..75a2766 --- /dev/null +++ b/res/systemd/rss.timer @@ -0,0 +1,8 @@ +[Unit] +Description=Hourly refresh of singlerss + +[Timer] +OnCalendar=hourly + +[Install] +WantedBy=timers.target diff --git a/singlerss.py b/singlerss.py new file mode 100755 index 0000000..c02b876 --- /dev/null +++ b/singlerss.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 + +# Copyright (c) Dom Rodriguez 2020 +# Licensed under the Apache License 2.0 + +import os +import sys +import feedparser +import listparser +import logging +from feedgen.feed import FeedGenerator + +log = None +fg = None +FEED_OUT_PATH = None +FEED_OUT_TYPE = None +FEED_LIST_PATH = None +FEEDS = [] + + +def setup_logging() -> None: + """ + This function intiialises the logger framework. + """ + global log + + log = logging.getLogger("singlerss") + out_handler = logging.StreamHandler(sys.stderr) + out_handler.setFormatter( + logging.Formatter( + '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + )) + log.addHandler(out_handler) + log.setLevel(logging.ERROR) + + return None + + +def init_feed() -> None: + """ + This function initialises the RSS feed with the + correct attributes. + """ + log.debug("Initialising the feed...") + + global fg + + try: + fg = FeedGenerator() + # Setup [root] feed attributes + fg.id("https://rss.shymega.org.uk/feed.xml") + fg.title("SingleRSS - Combined Feed") + fg.generator("SingleRSS/v1.0.0") + fg.link(href="https:/rss.shymega.org.uk/feed.xml", rel="self") + fg.subtitle("Combined feed for RSS feeds") + fg.language('en') + except: + log.error("Error initialising the feed!") + sys.exit(1) + + log.debug("Feed initialised!") + + return None + + +def parse_rss_feed(url) -> feedparser.FeedParserDict: + log.debug("Parsing RSS feed..") + + try: + # Hopefully this should parse.. + return feedparser.parse(url) + except Exception: + log.warninging("Failed to parse RSS feed.") + # Now, we could handle gracefully. + # This code is a WIP, but maybe we shouldn't crash? + log.warninging("Cannot continue, we want all the feeds to work!") + sys.exit(1) + + +def main(): + log.debug("Loading feed list into memory..") + feeds = None + with open(FEED_LIST_PATH, "r") as infile: + feeds = infile.read().splitlines() + + log.debug("Iterating over feed list..") + for feed in feeds: + FEEDS.append(feed) + + log.debug("Iterating over [input] feeds...") + for feed in FEEDS: + rss = parse_rss_feed(feed) + entries = rss.get("entries") + log.debug("Iterating over [input] feed entries..") + for entry in entries: + log.debug("New feed entry created.") + + fe = fg.add_entry() + + log.debug("Working on new feed entry..") + + try: + fe.id(entry["id"]) + except: + # Deifnitely weird... + log.warning("Empty id attribute, defaulting..") + fe.id("about:blank") + + try: + fe.title(entry["title"]) + except: + # OK, this is a definite malformed feed! + log.warning("Empty title attribute, defaulting..") + fe.title("Unspecified") + + try: + fe.link(href=entry["link"]) + except: + # When we have a empty link attribute, this isn't ideal + # to set a default value.. :/ + log.warning("Empty link attribute, defaulting..") + fe.link(href='about:blank') + + try: + if entry["sources"]["authors"]: + for author in entry["sources"]["authors"]: + fe.author(author) + elif entry["authors"]: + try: + for author in entry["authors"]: + fe.author(author) + except: + log.debug("Oh dear, a malformed feed! Adjusting.") + # This is a ugly hack to fix broken feed entries with the author attribute! + author["email"] = author.pop("href") + fe.author(author) + except: + # Sometimes we don't have ANY author attributes, so we + # have to set a dummy attribute. + log.warning("Empty authors attribute, defaulting..") + fe.author({"name": "Unspecified", + "email": "unspecified@example.com"}) + + try: + if entry["summary"]: + fe.summary(entry["summary"]) + fe.description(entry["summary"]) + elif entry["description"]: + fe.description(entry["description"]) + fe.summary(entry["description"]) + fe.content(entry["description"]) + except: + # Sometimes feeds don't provide a summary OR description, so we + # have to set an empty value. + # This is pretty useless for a feed, so hopefully we + # don't have to do it often! + log.warning( + "Empty description OR summary attribute, defaulting..") + fe.description("Unspecified") + fe.summary("Unspecified") + + try: + if entry["published"]: + try: + fe.published(entry["published"]) + fe.updated(entry["published"]) + except: + fe.published("1970-01/01T00:00:00+00:00") + fe.updated("1970-01/01T00:00:00+00:00") + continue + except: + # Sometimes feeds don't even provide a publish date, so we default to + # the start date &time of the Unix epoch. + log.warning("Empty publish attribute, defaulting..") + fe.published("1970-01/01T00:00:00+00:00") + fe.updated("1970-01/01T00:00:00+00:00") + + +if __name__ == "__main__": + setup_logging() + log.debug("Initialising...") + + log.debug("Assiging variables..") + try: + # Configuration is specified with environemnt variables. + log.debug("Assignment attempt: SINGLERSS_FEED_OUT_PATH") + FEED_OUT_PATH = os.environ["SINGLERSS_FEED_OUT_PATH"] + except KeyError: + log.error("*** Environment variable missing! ***") + log.error("`SINGLERSS_FEED_OUT_PATH` variable missing.") + log.error("This program will NOT run without that set.") + sys.exit(1) + + try: + FEED_LIST_PATH = os.environ["SINGLERSS_FEED_LIST_PATH"] + except: + log.error("*** Environment variable missing! ***") + log.error("`SINGLERSS_FEED_LIST_PATH` variable missing.") + sys.exit(1) + + try: + FEED_OUT_TYPE = os.environ["SINGLERSS_FEED_OUT_TYPE"] + except KeyError: + log.error("*** Environment variable missing! ***") + log.error("`SINGLERSS_FEED_OUT_TYPE` variable missing.") + log.error("This program will NOT run without that set.") + sys.exit(1) + + log.debug("Begin initialising variables..") + init_feed() + + log.debug("Begin processing feeds...") + main() + + if FEED_OUT_TYPE == "stdout": + log.debug("stdout output specified, outputting to stdout.") + print(fg.rss_str().decode('utf-8')) + elif FEED_OUT_TYPE == "file": + log.debug("File output specified, outputting to specified file..") + fg.rss_file(FEED_OUT_PATH) + else: + log.error("Unknown type of output preference, cannot run.") + sys.exit(1)