now filters out iframe sources not in allowlist, wip #4
This commit is contained in:
parent
17d7faac4d
commit
0554e93de2
@ -121,7 +121,7 @@ def grab_media(post_directory, url):
|
|||||||
return image
|
return image
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('Failed to download cover image', url)
|
print('Failed to download image', url)
|
||||||
print(e)
|
print(e)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
@ -129,24 +129,22 @@ def grab_media(post_directory, url):
|
|||||||
def parse_posts(post_dir, post_content):
|
def parse_posts(post_dir, post_content):
|
||||||
"""
|
"""
|
||||||
parse the post content to for media items
|
parse the post content to for media items
|
||||||
replace foreign media item with local copy
|
replace foreign image with local copy
|
||||||
|
filter out iframe sources not in allowlist
|
||||||
"""
|
"""
|
||||||
soup = BeautifulSoup(post_content, "html.parser")
|
soup = BeautifulSoup(post_content, "html.parser")
|
||||||
video_sources = ['youtube.com', 'vimeo.com']
|
allowed_iframe_sources = ['youtube.com', 'vimeo.com', 'tv.lumbung.space']
|
||||||
media = []
|
media = []
|
||||||
|
|
||||||
for img in soup(['img','object']):
|
for img in soup(['img','object']):
|
||||||
local_image = grab_media(post_dir, img['src'])
|
local_image = grab_media(post_dir, img['src'])
|
||||||
if img['src'] != local_image:
|
if img['src'] != local_image:
|
||||||
print(img['src'], '->', local_image)
|
|
||||||
img['src'] = local_image
|
img['src'] = local_image
|
||||||
|
|
||||||
for iframe in soup(['iframe']):
|
for iframe in soup(['iframe']):
|
||||||
#TODO figure out how to throw out blocklisted iframes while comparing
|
if not any(source in iframe['src'] for source in allowed_iframe_sources):
|
||||||
if video_sources[0] or video_sources[1] not in iframe['src']:
|
print('filtered iframe: {}...'.format(iframe['src'][:25]))
|
||||||
print(iframe)
|
iframe.decompose()
|
||||||
#iframe.decompose()
|
|
||||||
|
|
||||||
return soup.decode()
|
return soup.decode()
|
||||||
|
|
||||||
def grab_feed(feed_url):
|
def grab_feed(feed_url):
|
||||||
|
Loading…
Reference in New Issue
Block a user