multifeeder/feedtools.py

139 lines
3.6 KiB
Python
Raw Normal View History

2021-02-16 23:26:03 +01:00
import feedparser
from simpledatabase import SimpleDatabase
import json
2022-02-21 17:47:04 +01:00
from datetime import date, timedelta
import pypandoc
import re
2021-02-16 23:26:03 +01:00
def update():
""" Update all feeds """
2021-02-16 23:26:03 +01:00
feeds = open('feeds.txt').readlines()
db = SimpleDatabase('feeds.json', 'feeds.log')
tmp = {}
tmp['feeds'] = {}
tmp['all_posts_sorted'] = {}
2021-02-16 23:26:03 +01:00
for x, feed in enumerate(feeds):
parsed = feedparser.parse(feed)
if parsed:
# print(f'Adding: { parsed.feed.title } ({ parsed.feed.link })')
x = str(x)
tmp['feeds'][x] = {}
if parsed.feed.title:
tmp['feeds'][x]['title'] = parsed.feed.title
else:
tmp['feeds'][x]['title'] = ""
tmp['feeds'][x]['link'] = parsed.feed.link
tmp['feeds'][x]['rss'] = parsed.entries[0].title_detail.base
tmp['feeds'][x]['description'] = parsed.feed.description
for post in parsed.entries:
year = post['published_parsed'][0]
month = post['published_parsed'][1]
day = post['published_parsed'][2]
2022-02-21 17:47:04 +01:00
post_date = date(year, month, day)
if not str(post_date) in tmp['all_posts_sorted']:
tmp['all_posts_sorted'][str(post_date)] = []
post['feed_details'] = {}
post['feed_details']['title'] = parsed.feed.title
post['feed_details']['link'] = parsed.feed.link
post['feed_details']['rss'] = parsed.entries[0].title_detail.base
post['feed_details']['description'] = parsed.feed.description
tmp['all_posts_sorted'][str(post_date)].append(post)
2021-02-16 23:26:03 +01:00
db.update(tmp)
def load():
db = SimpleDatabase('feeds.json', 'feeds.log')
return db
2021-02-16 23:26:03 +01:00
def latest(num):
""" Collect the <num> latest published posts """
db = load()
dates = [key for key in db['all_posts_sorted'].keys()]
dates.sort(reverse=True)
2022-02-21 17:47:04 +01:00
feed = []
for date in dates:
posts = db['all_posts_sorted'][date]
for post in posts:
2022-02-21 17:47:04 +01:00
if len(feed) < int(num):
feed.append(post)
else:
break
2022-02-21 17:47:04 +01:00
return feed
2021-02-16 23:26:03 +01:00
def today():
""" Collect posts from today """
db = load()
today = date.today()
2022-02-21 17:47:04 +01:00
feed = []
2021-02-16 23:26:03 +01:00
for date_str, posts in db['all_posts_sorted'].items():
year = int(date_str.split('-')[0])
month = int(date_str.split('-')[1])
day = int(date_str.split('-')[2])
d = date(year, month, day)
# Check if any posts are published today
if d == today:
for post in posts:
2022-02-21 17:47:04 +01:00
feed.append(post)
2021-02-16 23:26:03 +01:00
2022-02-21 17:47:04 +01:00
return feed
2021-02-16 23:26:03 +01:00
def past(days):
""" Collect posts from a number of past <days> """
2021-02-16 23:26:03 +01:00
db = load()
point_in_the_past = date.today() - timedelta(int(days))
2022-02-21 17:47:04 +01:00
feed = []
for date_str, posts in db['all_posts_sorted'].items():
year = int(date_str.split('-')[0])
month = int(date_str.split('-')[1])
day = int(date_str.split('-')[2])
d = date(year, month, day)
if d > point_in_the_past:
for post in posts:
2022-02-21 17:47:04 +01:00
feed.append(post)
feed.reverse()
return feed
def md(feed):
md_feed = ''
for post in feed:
post_content = pypandoc.convert_text(
post['summary'], 'md', format='html'
)
if post['links']:
for link in post['links']:
if link['rel'] == 'enclosure':
if 'pdf' in link['type']:
post_content += f"\n<{ link['href'] }>\n"
post_content = re.sub(r'\n.*(-)\1{5,}.*\n', "", post_content) # remove all ------ lines from varia website posts
len_link = len(post['link']) + 4
len_line_dash = len_link * '-'
len_line_space = len_link * ' '
len_date_space = (len_link - len(post['published']) - 2 ) * ' '
md_feed += "------------------------- \n\n"
md_feed += f"# { post['title'] }" + "{.post_title} \n\n"
md_feed += f"""| |{ len_line_space }|
|-----------:|{ len_line_dash }|
| **posted** | { post['published'] }{ len_date_space } |
| **from** | <{ post['link'] }> |
"""
md_feed += f"{ post_content } \n\n"
return md_feed