From b8504e5254a1ed4f2505f5617fedd8cb7f5c3bda Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 25 Aug 2021 21:50:44 +0200 Subject: [PATCH] docs, url parsing regexes and a plan for tomorrow --- event_feed.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/event_feed.py b/event_feed.py index 5905e80..0be6e6f 100644 --- a/event_feed.py +++ b/event_feed.py @@ -29,13 +29,26 @@ template = env.get_template('event_template.md') existing_posts = os.listdir(output_dir) def findURLs(string): - + """ + return all URLs in a given string + """ regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" url = re.findall(regex,string) return [x[0] for x in url] +def find_imageURLS(string): + """ + return all image URLS in a given string + """ + regex = r"(?:http\:|https\:)?\/\/.*?\.(?:png|jpg|jpeg|gif|svg)" + + img_urls = re.findall(regex, string, flags=re.IGNORECASE) + return img_urls + def create_metadata(event): - #construct a formatted dict of event metadata for in a post + """ + construct a formatted dict of event metadata for use as frontmatter for HUGO post + """ if event.location: location_urls = findURLs(event.location) @@ -57,9 +70,16 @@ def create_metadata(event): 'uid': event.uid } + #TODO: 1 find image urls in description + # 2 download them to hugo post + # 3 replace image url with local path + return event_metadata def localize_time(date): + """ + Turn a given date into various timezones + """ # Dates need to be displayed for the various TZs # takes arrow objects @@ -104,6 +124,9 @@ def create_event_post(post_dir, event): def update_event_post(post_dir, event): + """ + Update a post based on the VCARD event 'created' field which changes when updated + """ if os.path.exists(post_dir): old_timestamp = open(os.path.join(post_dir,'.timestamp')).read() if event.created > arrow.get(old_timestamp):