import feedparser from simpledatabase import SimpleDatabase import json from datetime import date, timedelta import pypandoc import re def update(): """ Update all feeds """ feeds = open('feeds.txt').readlines() db = SimpleDatabase('feeds.json', 'feeds.log') tmp = {} tmp['feeds'] = {} tmp['all_posts_sorted'] = {} for x, feed in enumerate(feeds): parsed = feedparser.parse(feed) if parsed: # print(f'\n\n-----------------\nAdding: { parsed.feed.title } ({ parsed.feed.link })') x = str(x) tmp['feeds'][x] = {} if 'title' in parsed.feed: tmp['feeds'][x]['title'] = parsed.feed.title else: tmp['feeds'][x]['title'] = "" try: tmp['feeds'][x]['link'] = parsed.feed.link except: tmp['feeds'][x]['link'] = "" try: tmp['feeds'][x]['rss'] = parsed.entries[0].title_detail.base except: tmp['feeds'][x]['rss'] = "" try: tmp['feeds'][x]['description'] = parsed.feed.description except: tmp['feeds'][x]['description'] = "" for post in parsed.entries: print(post) try: year = post['published_parsed'][0] month = post['published_parsed'][1] day = post['published_parsed'][2] post_date = date(year, month, day) except: d, day, month, year, time = post['published'].split() if month == "Jan": month = 1 if month == "Feb": month = 2 if month == "Mar": month = 3 if month == "Apr": month = 4 if month == "May": month = 5 if month == "Jun": month = 6 if month == "Jul": month = 7 if month == "Aug": month = 8 if month == "Sep": month = 9 if month == "Oct": month = 10 if month == "Nov": month = 11 if month == "Dec": month = 12 post_date = date(int(year), int(month), int(day)) if not str(post_date) in tmp['all_posts_sorted']: tmp['all_posts_sorted'][str(post_date)] = [] post['feed_details'] = {} post['feed_details']['title'] = parsed.feed.title post['feed_details']['link'] = parsed.feed.link post['feed_details']['rss'] = parsed.entries[0].title_detail.base post['feed_details']['description'] = parsed.feed.description tmp['all_posts_sorted'][str(post_date)].append(post) db.update(tmp) def load(): db = SimpleDatabase('feeds.json', 'feeds.log') return db def latest(num): """ Collect the latest published posts """ db = load() dates = [key for key in db['all_posts_sorted'].keys()] dates.sort(reverse=True) feed = [] for date in dates: posts = db['all_posts_sorted'][date] for post in posts: if len(feed) < int(num): feed.append(post) else: break return feed def today(): """ Collect posts from today """ db = load() today = date.today() feed = [] for date_str, posts in db['all_posts_sorted'].items(): year = int(date_str.split('-')[0]) month = int(date_str.split('-')[1]) day = int(date_str.split('-')[2]) d = date(year, month, day) # Check if any posts are published today if d == today: for post in posts: feed.append(post) return feed def past(days): """ Collect posts from a number of past """ db = load() point_in_the_past = date.today() - timedelta(int(days)) feed = [] for date_str, posts in db['all_posts_sorted'].items(): year = int(date_str.split('-')[0]) month = int(date_str.split('-')[1]) day = int(date_str.split('-')[2]) d = date(year, month, day) if d > point_in_the_past: for post in posts: feed.append(post) feed.reverse() return feed def md(feed): md_feed = '' for post in feed: post_content = pypandoc.convert_text( post['summary'], 'md', format='html' ) if post['links']: for link in post['links']: if link['rel'] == 'enclosure': if 'pdf' in link['type']: post_content += f"\n<{ link['href'] }>\n" post_content = re.sub(r'\n.*(-)\1{5,}.*\n', "", post_content) # remove all ------ lines from varia website posts len_link = len(post['link']) + 4 len_line_dash = len_link * '-' len_line_space = len_link * ' ' len_date_space = (len_link - len(post['published']) - 2 ) * ' ' md_feed += "------------------------- \n\n" md_feed += f"# { post['title'] }" + "{.post_title} \n\n" md_feed += f"""| |{ len_line_space }| |-----------:|{ len_line_dash }| | **posted** | { post['published'] }{ len_date_space } | | **from** | <{ post['link'] }> | """ md_feed += f"{ post_content } \n\n" return md_feed