forked from varia/multifeeder
139 lines
3.6 KiB
Python
139 lines
3.6 KiB
Python
import feedparser
|
|
from simpledatabase import SimpleDatabase
|
|
import json
|
|
from datetime import date, timedelta
|
|
import pypandoc
|
|
import re
|
|
|
|
def update():
|
|
""" Update all feeds """
|
|
feeds = open('feeds.txt').readlines()
|
|
db = SimpleDatabase('feeds.json', 'feeds.log')
|
|
|
|
tmp = {}
|
|
tmp['feeds'] = {}
|
|
tmp['all_posts_sorted'] = {}
|
|
|
|
for x, feed in enumerate(feeds):
|
|
parsed = feedparser.parse(feed)
|
|
if parsed:
|
|
# print(f'Adding: { parsed.feed.title } ({ parsed.feed.link })')
|
|
x = str(x)
|
|
|
|
tmp['feeds'][x] = {}
|
|
if parsed.feed.title:
|
|
tmp['feeds'][x]['title'] = parsed.feed.title
|
|
else:
|
|
tmp['feeds'][x]['title'] = ""
|
|
tmp['feeds'][x]['link'] = parsed.feed.link
|
|
tmp['feeds'][x]['rss'] = parsed.entries[0].title_detail.base
|
|
tmp['feeds'][x]['description'] = parsed.feed.description
|
|
|
|
for post in parsed.entries:
|
|
year = post['published_parsed'][0]
|
|
month = post['published_parsed'][1]
|
|
day = post['published_parsed'][2]
|
|
post_date = date(year, month, day)
|
|
|
|
if not str(post_date) in tmp['all_posts_sorted']:
|
|
tmp['all_posts_sorted'][str(post_date)] = []
|
|
|
|
post['feed_details'] = {}
|
|
post['feed_details']['title'] = parsed.feed.title
|
|
post['feed_details']['link'] = parsed.feed.link
|
|
post['feed_details']['rss'] = parsed.entries[0].title_detail.base
|
|
post['feed_details']['description'] = parsed.feed.description
|
|
tmp['all_posts_sorted'][str(post_date)].append(post)
|
|
|
|
db.update(tmp)
|
|
|
|
def load():
|
|
db = SimpleDatabase('feeds.json', 'feeds.log')
|
|
return db
|
|
|
|
def latest(num):
|
|
""" Collect the <num> latest published posts """
|
|
db = load()
|
|
|
|
dates = [key for key in db['all_posts_sorted'].keys()]
|
|
dates.sort(reverse=True)
|
|
feed = []
|
|
|
|
for date in dates:
|
|
posts = db['all_posts_sorted'][date]
|
|
for post in posts:
|
|
if len(feed) < int(num):
|
|
feed.append(post)
|
|
else:
|
|
break
|
|
|
|
return feed
|
|
|
|
def today():
|
|
""" Collect posts from today """
|
|
db = load()
|
|
today = date.today()
|
|
feed = []
|
|
|
|
for date_str, posts in db['all_posts_sorted'].items():
|
|
year = int(date_str.split('-')[0])
|
|
month = int(date_str.split('-')[1])
|
|
day = int(date_str.split('-')[2])
|
|
d = date(year, month, day)
|
|
|
|
# Check if any posts are published today
|
|
if d == today:
|
|
for post in posts:
|
|
feed.append(post)
|
|
|
|
return feed
|
|
|
|
def past(days):
|
|
""" Collect posts from a number of past <days> """
|
|
db = load()
|
|
point_in_the_past = date.today() - timedelta(int(days))
|
|
feed = []
|
|
|
|
for date_str, posts in db['all_posts_sorted'].items():
|
|
year = int(date_str.split('-')[0])
|
|
month = int(date_str.split('-')[1])
|
|
day = int(date_str.split('-')[2])
|
|
d = date(year, month, day)
|
|
|
|
if d > point_in_the_past:
|
|
for post in posts:
|
|
feed.append(post)
|
|
|
|
feed.reverse()
|
|
return feed
|
|
|
|
def md(feed):
|
|
md_feed = ''
|
|
for post in feed:
|
|
|
|
post_content = pypandoc.convert_text(
|
|
post['summary'], 'md', format='html'
|
|
)
|
|
if post['links']:
|
|
for link in post['links']:
|
|
if link['rel'] == 'enclosure':
|
|
if 'pdf' in link['type']:
|
|
post_content += f"\n<{ link['href'] }>\n"
|
|
post_content = re.sub(r'\n.*(-)\1{5,}.*\n', "", post_content) # remove all ------ lines from varia website posts
|
|
|
|
len_link = len(post['link']) + 4
|
|
len_line_dash = len_link * '-'
|
|
len_line_space = len_link * ' '
|
|
len_date_space = (len_link - len(post['published']) - 2 ) * ' '
|
|
|
|
md_feed += "------------------------- \n\n"
|
|
md_feed += f"# { post['title'] }" + "{.post_title} \n\n"
|
|
md_feed += f"""| |{ len_line_space }|
|
|
|-----------:|{ len_line_dash }|
|
|
| **posted** | { post['published'] }{ len_date_space } |
|
|
| **from** | <{ post['link'] }> |
|
|
|
|
"""
|
|
md_feed += f"{ post_content } \n\n"
|
|
|
|
return md_feed |