URLS in Location field are now parsed and truncated

This commit is contained in:
rra 2021-08-25 20:30:14 +02:00
parent 066464ede6
commit 76d5f25835

View File

@ -11,7 +11,9 @@ import shutil
from slugify import slugify
from natural import date
from event_feed_config import calendar_url, output_dir
from urllib.parse import urlparse
import arrow
import re
cal = Calendar(requests.get(calendar_url).text)
@ -26,8 +28,23 @@ template = env.get_template('event_template.md')
existing_posts = os.listdir(output_dir)
def findURLs(string):
regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
url = re.findall(regex,string)
return [x[0] for x in url]
def create_metadata(event):
#construct a formatted dict of event metadata for in a post
if event.location:
location_urls = findURLs(event.location)
if location_urls:
location_url = location_urls[0]
event.location = '[{}]({})'.format(urlparse(location_url).netloc, location_url)
event_metadata = {
'name':event.name,
'created':event.created.format(),