now filters out iframe sources not in allowlist, wip #4
This commit is contained in:
parent
17d7faac4d
commit
0554e93de2
@ -121,7 +121,7 @@ def grab_media(post_directory, url):
|
||||
return image
|
||||
|
||||
except Exception as e:
|
||||
print('Failed to download cover image', url)
|
||||
print('Failed to download image', url)
|
||||
print(e)
|
||||
return url
|
||||
|
||||
@ -129,24 +129,22 @@ def grab_media(post_directory, url):
|
||||
def parse_posts(post_dir, post_content):
|
||||
"""
|
||||
parse the post content to for media items
|
||||
replace foreign media item with local copy
|
||||
replace foreign image with local copy
|
||||
filter out iframe sources not in allowlist
|
||||
"""
|
||||
soup = BeautifulSoup(post_content, "html.parser")
|
||||
video_sources = ['youtube.com', 'vimeo.com']
|
||||
allowed_iframe_sources = ['youtube.com', 'vimeo.com', 'tv.lumbung.space']
|
||||
media = []
|
||||
|
||||
for img in soup(['img','object']):
|
||||
local_image = grab_media(post_dir, img['src'])
|
||||
if img['src'] != local_image:
|
||||
print(img['src'], '->', local_image)
|
||||
img['src'] = local_image
|
||||
|
||||
for iframe in soup(['iframe']):
|
||||
#TODO figure out how to throw out blocklisted iframes while comparing
|
||||
if video_sources[0] or video_sources[1] not in iframe['src']:
|
||||
print(iframe)
|
||||
#iframe.decompose()
|
||||
|
||||
if not any(source in iframe['src'] for source in allowed_iframe_sources):
|
||||
print('filtered iframe: {}...'.format(iframe['src'][:25]))
|
||||
iframe.decompose()
|
||||
return soup.decode()
|
||||
|
||||
def grab_feed(feed_url):
|
||||
|
Loading…
Reference in New Issue
Block a user