From 3f236e77ff06f8e626220c65242976336c5bc5fa Mon Sep 17 00:00:00 2001 From: manetta Date: Mon, 21 Feb 2022 17:47:04 +0100 Subject: [PATCH] adding markdown support --- feedtools.py | 61 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/feedtools.py b/feedtools.py index c3a4c58..d1fdd56 100644 --- a/feedtools.py +++ b/feedtools.py @@ -1,8 +1,9 @@ import feedparser from simpledatabase import SimpleDatabase import json -from datetime import datetime, date, timedelta -from backports.zoneinfo import ZoneInfo +from datetime import date, timedelta +import pypandoc +import re def update(): """ Update all feeds """ @@ -32,7 +33,7 @@ def update(): year = post['published_parsed'][0] month = post['published_parsed'][1] day = post['published_parsed'][2] - post_date = datetime(year, month, day, tzinfo=ZoneInfo("Europe/Amsterdam")) + post_date = date(year, month, day) if not str(post_date) in tmp['all_posts_sorted']: tmp['all_posts_sorted'][str(post_date)] = [] @@ -56,23 +57,23 @@ def latest(num): dates = [key for key in db['all_posts_sorted'].keys()] dates.sort(reverse=True) - request = [] + feed = [] for date in dates: posts = db['all_posts_sorted'][date] for post in posts: - if len(request) < int(num): - request.append(post) + if len(feed) < int(num): + feed.append(post) else: break - return request + return feed def today(): """ Collect posts from today """ db = load() today = date.today() - request = [] + feed = [] for date_str, posts in db['all_posts_sorted'].items(): year = int(date_str.split('-')[0]) @@ -83,15 +84,15 @@ def today(): # Check if any posts are published today if d == today: for post in posts: - request.append(post) + feed.append(post) - return request + return feed def past(days): """ Collect posts from a number of past """ db = load() point_in_the_past = date.today() - timedelta(int(days)) - request = [] + feed = [] for date_str, posts in db['all_posts_sorted'].items(): year = int(date_str.split('-')[0]) @@ -101,6 +102,38 @@ def past(days): if d > point_in_the_past: for post in posts: - request.append(post) - - return request + feed.append(post) + + feed.reverse() + return feed + +def md(feed): + md_feed = '' + for post in feed: + + post_content = pypandoc.convert_text( + post['summary'], 'md', format='html' + ) + if post['links']: + for link in post['links']: + if link['rel'] == 'enclosure': + if 'pdf' in link['type']: + post_content += f"\n<{ link['href'] }>\n" + post_content = re.sub(r'\n.*(-)\1{5,}.*\n', "", post_content) # remove all ------ lines from varia website posts + + len_link = len(post['link']) + 4 + len_line_dash = len_link * '-' + len_line_space = len_link * ' ' + len_date_space = (len_link - len(post['published']) - 2 ) * ' ' + + md_feed += "------------------------- \n\n" + md_feed += f"# { post['title'] }" + "{.post_title} \n\n" + md_feed += f"""| |{ len_line_space }| +|-----------:|{ len_line_dash }| +| **posted** | { post['published'] }{ len_date_space } | +| **from** | <{ post['link'] }> | + +""" + md_feed += f"{ post_content } \n\n" + + return md_feed \ No newline at end of file