docs, url parsing regexes and a plan for tomorrow

This commit is contained in:
rra 2021-08-25 21:50:44 +02:00
parent f3cdf56b85
commit b8504e5254

View File

@ -29,13 +29,26 @@ template = env.get_template('event_template.md')
existing_posts = os.listdir(output_dir)
def findURLs(string):
"""
return all URLs in a given string
"""
regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
url = re.findall(regex,string)
return [x[0] for x in url]
def find_imageURLS(string):
"""
return all image URLS in a given string
"""
regex = r"(?:http\:|https\:)?\/\/.*?\.(?:png|jpg|jpeg|gif|svg)"
img_urls = re.findall(regex, string, flags=re.IGNORECASE)
return img_urls
def create_metadata(event):
#construct a formatted dict of event metadata for in a post
"""
construct a formatted dict of event metadata for use as frontmatter for HUGO post
"""
if event.location:
location_urls = findURLs(event.location)
@ -57,9 +70,16 @@ def create_metadata(event):
'uid': event.uid
}
#TODO: 1 find image urls in description
# 2 download them to hugo post
# 3 replace image url with local path
return event_metadata
def localize_time(date):
"""
Turn a given date into various timezones
"""
# Dates need to be displayed for the various TZs
# takes arrow objects
@ -104,6 +124,9 @@ def create_event_post(post_dir, event):
def update_event_post(post_dir, event):
"""
Update a post based on the VCARD event 'created' field which changes when updated
"""
if os.path.exists(post_dir):
old_timestamp = open(os.path.join(post_dir,'.timestamp')).read()
if event.created > arrow.get(old_timestamp):