Multifeeding RSS streams into points of access.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

190 lines
5.6 KiB

import feedparser
from simpledatabase import SimpleDatabase
import json
from datetime import date, timedelta
import pypandoc
import re
def update():
"""Update all feeds"""
feeds = open("feeds.txt").readlines()
db = SimpleDatabase("feeds.json", "feeds.log")
tmp = {}
tmp["feeds"] = {}
tmp["all_posts_sorted"] = {}
for x, feed in enumerate(feeds):
parsed = feedparser.parse(feed)
if parsed:
# print(f'\n\n-----------------\nAdding: { parsed.feed.title } ({ parsed.feed.link })')
x = str(x)
tmp["feeds"][x] = {}
if "title" in parsed.feed:
tmp["feeds"][x]["title"] = parsed.feed.title
else:
tmp["feeds"][x]["title"] = ""
try:
tmp["feeds"][x]["link"] = parsed.feed.link
except:
tmp["feeds"][x]["link"] = ""
try:
tmp["feeds"][x]["rss"] = parsed.entries[0].title_detail.base
except:
tmp["feeds"][x]["rss"] = ""
try:
tmp["feeds"][x]["description"] = parsed.feed.description
except:
tmp["feeds"][x]["description"] = ""
for post in parsed.entries:
print(post)
try:
year = post["published_parsed"][0]
month = post["published_parsed"][1]
day = post["published_parsed"][2]
post_date = date(year, month, day)
except:
d, day, month, year, time = post["published"].split()
if month == "Jan":
month = 1
if month == "Feb":
month = 2
if month == "Mar":
month = 3
if month == "Apr":
month = 4
if month == "May":
month = 5
if month == "Jun":
month = 6
if month == "Jul":
month = 7
if month == "Aug":
month = 8
if month == "Sep":
month = 9
if month == "Oct":
month = 10
if month == "Nov":
month = 11
if month == "Dec":
month = 12
post_date = date(int(year), int(month), int(day))
if not str(post_date) in tmp["all_posts_sorted"]:
tmp["all_posts_sorted"][str(post_date)] = []
post["feed_details"] = {}
post["feed_details"]["title"] = parsed.feed.title
post["feed_details"]["link"] = parsed.feed.link
post["feed_details"]["rss"] = parsed.entries[
0
].title_detail.base
post["feed_details"]["description"] = parsed.feed.description
tmp["all_posts_sorted"][str(post_date)].append(post)
db.update(tmp)
def load():
db = SimpleDatabase("feeds.json", "feeds.log")
return db
def latest(num):
"""Collect the <num> latest published posts"""
db = load()
dates = [key for key in db["all_posts_sorted"].keys()]
dates.sort(reverse=True)
feed = []
for date in dates:
posts = db["all_posts_sorted"][date]
for post in posts:
if len(feed) < int(num):
feed.append(post)
else:
break
return feed
def today():
"""Collect posts from today"""
db = load()
today = date.today()
feed = []
for date_str, posts in db["all_posts_sorted"].items():
year = int(date_str.split("-")[0])
month = int(date_str.split("-")[1])
day = int(date_str.split("-")[2])
d = date(year, month, day)
# Check if any posts are published today
if d == today:
for post in posts:
feed.append(post)
return feed
def past(days):
"""Collect posts from a number of past <days>"""
db = load()
point_in_the_past = date.today() - timedelta(int(days))
feed = []
for date_str, posts in db["all_posts_sorted"].items():
year = int(date_str.split("-")[0])
month = int(date_str.split("-")[1])
day = int(date_str.split("-")[2])
d = date(year, month, day)
if d > point_in_the_past:
for post in posts:
feed.append(post)
feed.reverse()
return feed
def md(feed):
md_feed = ""
for post in feed:
post_content = pypandoc.convert_text(
post["summary"], "md", format="html"
)
if post["links"]:
for link in post["links"]:
if link["rel"] == "enclosure":
if "pdf" in link["type"]:
post_content += f"\n<{ link['href'] }>\n"
post_content = re.sub(
r"\n.*(-)\1{5,}.*\n", "", post_content
) # remove all ------ lines from varia website posts
len_link = len(post["link"]) + 4
len_line_dash = len_link * "-"
len_line_space = len_link * " "
len_date_space = (len_link - len(post["published"]) - 2) * " "
md_feed += "------------------------- \n\n"
md_feed += f"# { post['title'] }" + "{.post_title} \n\n"
md_feed += f"""| |{ len_line_space }|
|-----------:|{ len_line_dash }|
| **posted** | { post['published'] }{ len_date_space } |
| **from** | <{ post['link'] }> |
"""
md_feed += f"{ post_content } \n\n"
return md_feed