wip of #4
This commit is contained in:
parent
fbb22d64fc
commit
611bea8f24
@ -95,10 +95,10 @@ def create_post(post_dir, entry):
|
|||||||
|
|
||||||
parsed_content = parse_posts(post_dir, post_content)
|
parsed_content = parse_posts(post_dir, post_content)
|
||||||
|
|
||||||
with open(os.path.join(post_dir,'index.html'),'w') as f:
|
with open(os.path.join(post_dir,'index.html'),'w') as f: #n.b. .html
|
||||||
post = template.render(frontmatter=frontmatter, content=parsed_content)
|
post = template.render(frontmatter=frontmatter, content=parsed_content)
|
||||||
f.write(post)
|
f.write(post)
|
||||||
print('created post for', entry.title, '({})'.format(entry.link))
|
#print('created post for', entry.title, '({})'.format(entry.link))
|
||||||
|
|
||||||
def grab_media(post_directory, url):
|
def grab_media(post_directory, url):
|
||||||
"""
|
"""
|
||||||
@ -124,18 +124,27 @@ def grab_media(post_directory, url):
|
|||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def parse_posts(post_direntry, post_content):
|
def parse_posts(post_dir, post_content):
|
||||||
"""
|
"""
|
||||||
parse the post content to for media items
|
parse the post content to for media items
|
||||||
replace foreign media item with local copy
|
replace foreign media item with local copy
|
||||||
"""
|
"""
|
||||||
soup = BeautifulSoup(post_content, "html.parser")
|
soup = BeautifulSoup(post_content, "html.parser")
|
||||||
|
video_sources = ['youtube.com', 'vimeo.com']
|
||||||
media = []
|
media = []
|
||||||
|
|
||||||
for img in soup(['img','object']):
|
for img in soup(['img','object']):
|
||||||
local_image = grab_media(post_dir, img['src'])
|
local_image = grab_media(post_dir, img['src'])
|
||||||
if img['src'] != local_image:
|
if img['src'] != local_image:
|
||||||
print(img['src'], '->', local_image)
|
print(img['src'], '->', local_image)
|
||||||
img['src'] = local_image
|
img['src'] = local_image
|
||||||
|
|
||||||
|
for iframe in soup(['iframe']):
|
||||||
|
#TODO figure out how to throw out blocklisted iframes while comparing
|
||||||
|
if video_sources[0] or video_sources[1] not in iframe['src']:
|
||||||
|
print(iframe)
|
||||||
|
#iframe.decompose()
|
||||||
|
|
||||||
return soup.decode()
|
return soup.decode()
|
||||||
|
|
||||||
def grab_feed(feed_url):
|
def grab_feed(feed_url):
|
||||||
@ -188,6 +197,9 @@ if not os.path.exists(output_dir):
|
|||||||
|
|
||||||
template = env.get_template('post_template.md')
|
template = env.get_template('post_template.md')
|
||||||
|
|
||||||
|
#add iframe to the allowlist of feedparser's sanitizer,
|
||||||
|
#this is now handled in parse_post()
|
||||||
|
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {'iframe'}
|
||||||
|
|
||||||
for feed_url in feed_urls:
|
for feed_url in feed_urls:
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user