only parse rss feeds once every 10 minutes and store in json file

This commit is contained in:
crunk 2023-07-15 21:50:19 +02:00
parent 9fe56e9507
commit f6e9b8f4ad
7 changed files with 119 additions and 31 deletions

42
app.py
View File

@ -1,9 +1,11 @@
import os import os
from flask import Flask import json
import flask_apscheduler import flask_apscheduler
# from flask_sqlalchemy import SQLAlchemy import tomli
# db = SQLAlchemy() from flask import Flask
# migrate = Migrate()
from column import Column
from simplejsonstorage import SimpleJsonStorage
def create_app(): def create_app():
@ -12,12 +14,32 @@ def create_app():
scheduler.api_enabled = False scheduler.api_enabled = False
scheduler.init_app(APP) scheduler.init_app(APP)
scheduler.start() scheduler.start()
# APP.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///data/crunk_data.db" json_file = SimpleJsonStorage(
# APP.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True os.path.join("data", "feeds.json"), os.path.join("data", "feeds.log")
# db.init_app(APP) )
# migrate.init_app(APP, db, render_as_batch=True) update_feeds(json_file)
@scheduler.task("interval", id="update", minutes=10) @scheduler.task("interval", id="update", minutes=10)
def update(): def update():
print("Updating the RSS feeds!") update_feeds(json_file)
return APP return APP
def update_feeds(json_file):
with open("columns.toml", "rb") as f:
column_dict = tomli.load(f)
columns_file = column_dict["column"]
columns = {}
for column_from_file in columns_file:
title = column_from_file["title"]
column = Column(title, column_from_file["urls"])
if "limit" in column_from_file:
column.set_limit(column_from_file["limit"])
if "sort_order" in column_from_file:
column.set_sort_order(column_from_file["sort_order"])
column.load_content_from_feeds()
columns[title] = column.__dict__
json_file.update(columns)

View File

@ -1,16 +1,17 @@
import random import random
import json
from datetime import datetime from datetime import datetime
from time import mktime from time import mktime
from parse_rss_feeds import parse_rss_feeds from parse_rss_feeds import parse_rss_feeds
class Column: class Column(object):
def __init__(self, title, urls): def __init__(self, title, urls):
self.title = title self.title = title
self.urls = urls self.urls = urls
self.entries = None
self.sort_order = None
self.limit = None self.limit = None
self.sort_order = None
self.entries = None
def set_sort_order(self, sort_order): def set_sort_order(self, sort_order):
self.sort_order = sort_order self.sort_order = sort_order
@ -18,6 +19,12 @@ class Column:
def set_limit(self, limit): def set_limit(self, limit):
self.limit = limit self.limit = limit
def set_entries(self, entries):
self.entries = entries
def set_title(self, title):
self.title = title
def _sort_by_order(self): def _sort_by_order(self):
entrylist = list(self.entries.items()) entrylist = list(self.entries.items())
if self.sort_order.lower() == "reverse": if self.sort_order.lower() == "reverse":

1
data/feeds.json Normal file
View File

@ -0,0 +1 @@
{}

0
data/feeds.log Normal file
View File

View File

@ -2,7 +2,7 @@ from feedparser import parse
import random import random
def _parse_single_rss_feed(url, entries): def parse_single_rss_feed(url, entries):
feed = parse(url) feed = parse(url)
entrylength = len(entries) entrylength = len(entries)
for entrynumber, entry in enumerate(feed.entries): for entrynumber, entry in enumerate(feed.entries):
@ -21,5 +21,5 @@ def _parse_single_rss_feed(url, entries):
def parse_rss_feeds(urls): def parse_rss_feeds(urls):
entries = {} entries = {}
for url in urls: for url in urls:
entries = _parse_single_rss_feed(url, entries) entries = parse_single_rss_feed(url, entries)
return entries return entries

60
simplejsonstorage.py Normal file
View File

@ -0,0 +1,60 @@
from os import environ, mkdir
from os.path import exists
from pathlib import Path
from logging import DEBUG, INFO, basicConfig, getLogger
from json import dumps, loads
class SimpleJsonStorage(dict):
"""A simple json file.
It is a dictionary which saves to disk on all writes. It is optimised for
ease of hacking and accessibility and not for performance or efficiency.
Written by decentral1se, as part of:
https://git.vvvvvvaria.org/decentral1se/xbotlib/src/branch/main/xbotlib.py
"""
def __init__(self, filename, log, *args, **kwargs):
"""Initialise the object."""
self.filename = Path(filename).absolute()
self.log = getLogger(__name__)
self._loads()
self.update(*args, **kwargs)
def _loads(self):
"""Load the file."""
if not exists(self.filename):
return
try:
with open(self.filename, "r") as handle:
self.update(loads(handle.read()))
except Exception as exception:
message = f"Loading file storage failed: {exception}"
self.log.error(message, exc_info=exception)
exit(1)
def _dumps(self):
"""Save the file to disk."""
try:
with open(self.filename, "w") as handle:
handle.write(dumps(self, indent=4, sort_keys=True))
except Exception as exception:
message = f"Saving file storage failed: {exception}"
self.log.error(message, exc_info=exception)
exit(1)
def __setitem__(self, key, val):
"""Write data to the file."""
super().__setitem__(key, val)
self._dumps()
def __delitem__(self, key):
"""Remove data from the file."""
super().__delitem__(key)
self._dumps()
def update(self, *args, **kwargs):
"""Update the file."""
for k, v in dict(*args, **kwargs).items():
self[k] = v
self._dumps()

View File

@ -1,6 +1,8 @@
import tomli import tomli
import os
import json
from flask import render_template from flask import render_template
from simplejsonstorage import SimpleJsonStorage
from app import create_app from app import create_app
from column import Column from column import Column
@ -9,26 +11,22 @@ APP = create_app()
@APP.route("/") @APP.route("/")
def index(): def index():
with open("columns.toml", "rb") as f: json_file = load_json_file()
column_dict = tomli.load(f)
columns_file = column_dict["column"]
columns = [] columns = []
for column_from_file in columns_file: for key, value in json_file.items():
urls = column_from_file["urls"] print(key)
title = column_from_file["title"] column = Column(value["title"], value["urls"])
column = Column(title=title, urls=urls) column.set_entries(value["entries"])
if "limit" in column_from_file:
column.set_limit(column_from_file["limit"])
if "sort_order" in column_from_file:
column.set_sort_order(column_from_file["sort_order"])
column.load_content_from_feeds()
columns.append(column) columns.append(column)
return render_template("index.html", columns=columns) return render_template("index.html", columns=columns)
def load_json_file():
json_file = SimpleJsonStorage(
os.path.join("data", "feeds.json"), os.path.join("data", "feeds.log")
)
return json_file
if __name__ == "__main__": if __name__ == "__main__":
APP.debug = True APP.debug = True
APP.run(port=5000) APP.run(port=5000)