From f6e9b8f4ad7e81b482e8b65d15dba5f792c7524b Mon Sep 17 00:00:00 2001 From: crunk Date: Sat, 15 Jul 2023 21:50:19 +0200 Subject: [PATCH] only parse rss feeds once every 10 minutes and store in json file --- app.py | 42 +++++++++++++++++++++++-------- column.py | 13 +++++++--- data/feeds.json | 1 + data/feeds.log | 0 parse_rss_feeds.py | 4 +-- simplejsonstorage.py | 60 ++++++++++++++++++++++++++++++++++++++++++++ start.py | 30 +++++++++++----------- 7 files changed, 119 insertions(+), 31 deletions(-) create mode 100644 data/feeds.json create mode 100644 data/feeds.log create mode 100644 simplejsonstorage.py diff --git a/app.py b/app.py index b135b42..b28eb05 100644 --- a/app.py +++ b/app.py @@ -1,9 +1,11 @@ import os -from flask import Flask +import json import flask_apscheduler -# from flask_sqlalchemy import SQLAlchemy -# db = SQLAlchemy() -# migrate = Migrate() +import tomli +from flask import Flask + +from column import Column +from simplejsonstorage import SimpleJsonStorage def create_app(): @@ -12,12 +14,32 @@ def create_app(): scheduler.api_enabled = False scheduler.init_app(APP) scheduler.start() - # APP.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///data/crunk_data.db" - # APP.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True - # db.init_app(APP) - # migrate.init_app(APP, db, render_as_batch=True) + json_file = SimpleJsonStorage( + os.path.join("data", "feeds.json"), os.path.join("data", "feeds.log") + ) + update_feeds(json_file) + @scheduler.task("interval", id="update", minutes=10) def update(): - print("Updating the RSS feeds!") - + update_feeds(json_file) + return APP + + +def update_feeds(json_file): + with open("columns.toml", "rb") as f: + column_dict = tomli.load(f) + + columns_file = column_dict["column"] + columns = {} + for column_from_file in columns_file: + title = column_from_file["title"] + column = Column(title, column_from_file["urls"]) + + if "limit" in column_from_file: + column.set_limit(column_from_file["limit"]) + if "sort_order" in column_from_file: + column.set_sort_order(column_from_file["sort_order"]) + column.load_content_from_feeds() + columns[title] = column.__dict__ + json_file.update(columns) diff --git a/column.py b/column.py index cd2fda1..9f67c7c 100644 --- a/column.py +++ b/column.py @@ -1,16 +1,17 @@ import random +import json from datetime import datetime from time import mktime from parse_rss_feeds import parse_rss_feeds -class Column: +class Column(object): def __init__(self, title, urls): self.title = title self.urls = urls - self.entries = None - self.sort_order = None self.limit = None + self.sort_order = None + self.entries = None def set_sort_order(self, sort_order): self.sort_order = sort_order @@ -18,6 +19,12 @@ class Column: def set_limit(self, limit): self.limit = limit + def set_entries(self, entries): + self.entries = entries + + def set_title(self, title): + self.title = title + def _sort_by_order(self): entrylist = list(self.entries.items()) if self.sort_order.lower() == "reverse": diff --git a/data/feeds.json b/data/feeds.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/data/feeds.json @@ -0,0 +1 @@ +{} diff --git a/data/feeds.log b/data/feeds.log new file mode 100644 index 0000000..e69de29 diff --git a/parse_rss_feeds.py b/parse_rss_feeds.py index 6306883..a360d5e 100644 --- a/parse_rss_feeds.py +++ b/parse_rss_feeds.py @@ -2,7 +2,7 @@ from feedparser import parse import random -def _parse_single_rss_feed(url, entries): +def parse_single_rss_feed(url, entries): feed = parse(url) entrylength = len(entries) for entrynumber, entry in enumerate(feed.entries): @@ -21,5 +21,5 @@ def _parse_single_rss_feed(url, entries): def parse_rss_feeds(urls): entries = {} for url in urls: - entries = _parse_single_rss_feed(url, entries) + entries = parse_single_rss_feed(url, entries) return entries diff --git a/simplejsonstorage.py b/simplejsonstorage.py new file mode 100644 index 0000000..68718d9 --- /dev/null +++ b/simplejsonstorage.py @@ -0,0 +1,60 @@ +from os import environ, mkdir +from os.path import exists +from pathlib import Path +from logging import DEBUG, INFO, basicConfig, getLogger +from json import dumps, loads + +class SimpleJsonStorage(dict): + """A simple json file. + It is a dictionary which saves to disk on all writes. It is optimised for + ease of hacking and accessibility and not for performance or efficiency. + Written by decentral1se, as part of: + https://git.vvvvvvaria.org/decentral1se/xbotlib/src/branch/main/xbotlib.py + """ + + def __init__(self, filename, log, *args, **kwargs): + """Initialise the object.""" + self.filename = Path(filename).absolute() + self.log = getLogger(__name__) + + self._loads() + self.update(*args, **kwargs) + + def _loads(self): + """Load the file.""" + if not exists(self.filename): + return + + try: + with open(self.filename, "r") as handle: + self.update(loads(handle.read())) + except Exception as exception: + message = f"Loading file storage failed: {exception}" + self.log.error(message, exc_info=exception) + exit(1) + + def _dumps(self): + """Save the file to disk.""" + try: + with open(self.filename, "w") as handle: + handle.write(dumps(self, indent=4, sort_keys=True)) + except Exception as exception: + message = f"Saving file storage failed: {exception}" + self.log.error(message, exc_info=exception) + exit(1) + + def __setitem__(self, key, val): + """Write data to the file.""" + super().__setitem__(key, val) + self._dumps() + + def __delitem__(self, key): + """Remove data from the file.""" + super().__delitem__(key) + self._dumps() + + def update(self, *args, **kwargs): + """Update the file.""" + for k, v in dict(*args, **kwargs).items(): + self[k] = v + self._dumps() diff --git a/start.py b/start.py index e7c5170..b4b3155 100644 --- a/start.py +++ b/start.py @@ -1,6 +1,8 @@ import tomli +import os +import json from flask import render_template - +from simplejsonstorage import SimpleJsonStorage from app import create_app from column import Column @@ -9,26 +11,22 @@ APP = create_app() @APP.route("/") def index(): - with open("columns.toml", "rb") as f: - column_dict = tomli.load(f) - - columns_file = column_dict["column"] + json_file = load_json_file() columns = [] - for column_from_file in columns_file: - urls = column_from_file["urls"] - title = column_from_file["title"] - column = Column(title=title, urls=urls) - - if "limit" in column_from_file: - column.set_limit(column_from_file["limit"]) - if "sort_order" in column_from_file: - column.set_sort_order(column_from_file["sort_order"]) - - column.load_content_from_feeds() + for key, value in json_file.items(): + print(key) + column = Column(value["title"], value["urls"]) + column.set_entries(value["entries"]) columns.append(column) return render_template("index.html", columns=columns) +def load_json_file(): + json_file = SimpleJsonStorage( + os.path.join("data", "feeds.json"), os.path.join("data", "feeds.log") + ) + return json_file + if __name__ == "__main__": APP.debug = True APP.run(port=5000)