Browse Source

URLS in Location field are now parsed and truncated

master
rra 3 years ago
parent
commit
76d5f25835
  1. 17
      event_feed.py

17
event_feed.py

@ -11,7 +11,9 @@ import shutil
from slugify import slugify from slugify import slugify
from natural import date from natural import date
from event_feed_config import calendar_url, output_dir from event_feed_config import calendar_url, output_dir
from urllib.parse import urlparse
import arrow import arrow
import re
cal = Calendar(requests.get(calendar_url).text) cal = Calendar(requests.get(calendar_url).text)
@ -26,8 +28,23 @@ template = env.get_template('event_template.md')
existing_posts = os.listdir(output_dir) existing_posts = os.listdir(output_dir)
def findURLs(string):
regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
url = re.findall(regex,string)
return [x[0] for x in url]
def create_metadata(event): def create_metadata(event):
#construct a formatted dict of event metadata for in a post #construct a formatted dict of event metadata for in a post
if event.location:
location_urls = findURLs(event.location)
if location_urls:
location_url = location_urls[0]
event.location = '[{}]({})'.format(urlparse(location_url).netloc, location_url)
event_metadata = { event_metadata = {
'name':event.name, 'name':event.name,
'created':event.created.format(), 'created':event.created.format(),

Loading…
Cancel
Save