From 0554e93de28601f4a1e2cc4fd6f500aedb595086 Mon Sep 17 00:00:00 2001
From: rra <rscmbbng@riseup.net>
Date: Thu, 23 Sep 2021 21:29:25 +0200
Subject: [PATCH] now filters out iframe sources not in allowlist, wip #4

---
 rss_aggregator.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/rss_aggregator.py b/rss_aggregator.py
index 0e49e30..9ef40e1 100644
--- a/rss_aggregator.py
+++ b/rss_aggregator.py
@@ -121,7 +121,7 @@ def grab_media(post_directory, url):
             return image
 
     except Exception as e:
-        print('Failed to download cover image', url)
+        print('Failed to download image', url)
         print(e)
     return url
 
@@ -129,24 +129,22 @@ def grab_media(post_directory, url):
 def parse_posts(post_dir, post_content):
     """
     parse the post content to for media items
-    replace foreign media item with local copy
+    replace foreign image with local copy
+    filter out iframe sources not in allowlist
     """
     soup = BeautifulSoup(post_content, "html.parser")
-    video_sources = ['youtube.com', 'vimeo.com']
+    allowed_iframe_sources = ['youtube.com', 'vimeo.com', 'tv.lumbung.space']
     media = []
 
     for img in soup(['img','object']):
         local_image = grab_media(post_dir, img['src'])
         if img['src'] != local_image:
-            print(img['src'], '->', local_image)
             img['src'] = local_image
 
     for iframe in soup(['iframe']):
-        #TODO figure out how to throw out blocklisted iframes while comparing
-        if video_sources[0] or video_sources[1] not in iframe['src']:
-            print(iframe)
-            #iframe.decompose()
-
+        if not any(source in iframe['src'] for source in allowed_iframe_sources):
+            print('filtered iframe: {}...'.format(iframe['src'][:25]))
+            iframe.decompose()
     return soup.decode()
 
 def grab_feed(feed_url):