forked from varia/multifeeder
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
168 lines
4.3 KiB
168 lines
4.3 KiB
import feedparser
|
|
from simpledatabase import SimpleDatabase
|
|
import json
|
|
from datetime import date, timedelta
|
|
import pypandoc
|
|
import re
|
|
|
|
def update():
|
|
""" Update all feeds """
|
|
feeds = open('feeds.txt').readlines()
|
|
db = SimpleDatabase('feeds.json', 'feeds.log')
|
|
|
|
tmp = {}
|
|
tmp['feeds'] = {}
|
|
tmp['all_posts_sorted'] = {}
|
|
|
|
for x, feed in enumerate(feeds):
|
|
parsed = feedparser.parse(feed)
|
|
if parsed:
|
|
# print(f'\n\n-----------------\nAdding: { parsed.feed.title } ({ parsed.feed.link })')
|
|
|
|
x = str(x)
|
|
|
|
tmp['feeds'][x] = {}
|
|
if 'title' in parsed.feed:
|
|
tmp['feeds'][x]['title'] = parsed.feed.title
|
|
else:
|
|
tmp['feeds'][x]['title'] = ""
|
|
try:
|
|
tmp['feeds'][x]['link'] = parsed.feed.link
|
|
except:
|
|
tmp['feeds'][x]['link'] = ""
|
|
try:
|
|
tmp['feeds'][x]['rss'] = parsed.entries[0].title_detail.base
|
|
except:
|
|
tmp['feeds'][x]['rss'] = ""
|
|
try:
|
|
tmp['feeds'][x]['description'] = parsed.feed.description
|
|
except:
|
|
tmp['feeds'][x]['description'] = ""
|
|
|
|
for post in parsed.entries:
|
|
|
|
print(post)
|
|
try:
|
|
|
|
year = post['published_parsed'][0]
|
|
month = post['published_parsed'][1]
|
|
day = post['published_parsed'][2]
|
|
post_date = date(year, month, day)
|
|
except:
|
|
d, day, month, year, time = post['published'].split()
|
|
if month == "Jan": month = 1
|
|
if month == "Feb": month = 2
|
|
if month == "Mar": month = 3
|
|
if month == "Apr": month = 4
|
|
if month == "May": month = 5
|
|
if month == "Jun": month = 6
|
|
if month == "Jul": month = 7
|
|
if month == "Aug": month = 8
|
|
if month == "Sep": month = 9
|
|
if month == "Oct": month = 10
|
|
if month == "Nov": month = 11
|
|
if month == "Dec": month = 12
|
|
post_date = date(int(year), int(month), int(day))
|
|
|
|
if not str(post_date) in tmp['all_posts_sorted']:
|
|
tmp['all_posts_sorted'][str(post_date)] = []
|
|
|
|
post['feed_details'] = {}
|
|
post['feed_details']['title'] = parsed.feed.title
|
|
post['feed_details']['link'] = parsed.feed.link
|
|
post['feed_details']['rss'] = parsed.entries[0].title_detail.base
|
|
post['feed_details']['description'] = parsed.feed.description
|
|
tmp['all_posts_sorted'][str(post_date)].append(post)
|
|
|
|
db.update(tmp)
|
|
|
|
def load():
|
|
db = SimpleDatabase('feeds.json', 'feeds.log')
|
|
return db
|
|
|
|
def latest(num):
|
|
""" Collect the <num> latest published posts """
|
|
db = load()
|
|
|
|
dates = [key for key in db['all_posts_sorted'].keys()]
|
|
dates.sort(reverse=True)
|
|
feed = []
|
|
|
|
for date in dates:
|
|
posts = db['all_posts_sorted'][date]
|
|
for post in posts:
|
|
if len(feed) < int(num):
|
|
feed.append(post)
|
|
else:
|
|
break
|
|
|
|
return feed
|
|
|
|
def today():
|
|
""" Collect posts from today """
|
|
db = load()
|
|
today = date.today()
|
|
feed = []
|
|
|
|
for date_str, posts in db['all_posts_sorted'].items():
|
|
year = int(date_str.split('-')[0])
|
|
month = int(date_str.split('-')[1])
|
|
day = int(date_str.split('-')[2])
|
|
d = date(year, month, day)
|
|
|
|
# Check if any posts are published today
|
|
if d == today:
|
|
for post in posts:
|
|
feed.append(post)
|
|
|
|
return feed
|
|
|
|
def past(days):
|
|
""" Collect posts from a number of past <days> """
|
|
db = load()
|
|
point_in_the_past = date.today() - timedelta(int(days))
|
|
feed = []
|
|
|
|
for date_str, posts in db['all_posts_sorted'].items():
|
|
year = int(date_str.split('-')[0])
|
|
month = int(date_str.split('-')[1])
|
|
day = int(date_str.split('-')[2])
|
|
d = date(year, month, day)
|
|
|
|
if d > point_in_the_past:
|
|
for post in posts:
|
|
feed.append(post)
|
|
|
|
feed.reverse()
|
|
return feed
|
|
|
|
def md(feed):
|
|
md_feed = ''
|
|
for post in feed:
|
|
|
|
post_content = pypandoc.convert_text(
|
|
post['summary'], 'md', format='html'
|
|
)
|
|
if post['links']:
|
|
for link in post['links']:
|
|
if link['rel'] == 'enclosure':
|
|
if 'pdf' in link['type']:
|
|
post_content += f"\n<{ link['href'] }>\n"
|
|
post_content = re.sub(r'\n.*(-)\1{5,}.*\n', "", post_content) # remove all ------ lines from varia website posts
|
|
|
|
len_link = len(post['link']) + 4
|
|
len_line_dash = len_link * '-'
|
|
len_line_space = len_link * ' '
|
|
len_date_space = (len_link - len(post['published']) - 2 ) * ' '
|
|
|
|
md_feed += "------------------------- \n\n"
|
|
md_feed += f"# { post['title'] }" + "{.post_title} \n\n"
|
|
md_feed += f"""| |{ len_line_space }|
|
|
|-----------:|{ len_line_dash }|
|
|
| **posted** | { post['published'] }{ len_date_space } |
|
|
| **from** | <{ post['link'] }> |
|
|
|
|
"""
|
|
md_feed += f"{ post_content } \n\n"
|
|
|
|
return md_feed
|