now it actually downloads loops

2 years ago · 9a10f4f97a
1 changed files with 81 additions and 47 deletions
--- a/download_loooooops.py
+++ b/download_loooooops.py
@ -1,11 +1,32 @@
 import requests
 from time import sleep
-
+import datetime
+import os
+from urllib.parse import urlparse
+import shutil

 #def download_media(dir, url):
 #   remote_url
 #   description

+output_dir = "/home/r/Programming/radio-looptober/loops"
+
+def grab_media(path, url, filename):
+
+    media_item = urlparse(url).path.split('/')[-1]
+
+    headers = {
+    'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober',
+    'From': 'post.lurk.org/@lurk'  # This is another valid field
+    } 
+
+    if not os.path.exists(os.path.join(path, media_item)):
+        response = requests.get(url, headers=headers, stream=True)
+        if response.ok:
+            with open(os.path.join(path, media_item), 'wb') as media_file:
+                shutil.copyfileobj(response.raw, media_file)
+                print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc))
+                return media_item

 #This pages through all the looptober tag and collects the json in 'data'
 there_is_more = True
@ -14,13 +35,13 @@ data = []
 while there_is_more:
    print("downloading", url)
    r = requests.get(url)
-	print(r.status_code)
+    print("response status: ", r.status_code)
    if r.ok:
        if r.content:

            data.append(r.json())
-			print(len(data))
-			sleep(1)
+            print("amount of pages:", len(data))
+            sleep(0.5)

            if r.links:
                url = r.links["next"]["url"]
@ -55,8 +76,21 @@ for collection in data:
                    i["account"]["username"],
                    len(i["media_attachments"])))

+if not os.path.exists(output_dir):
+    os.mkdir(output_dir)
+
+for l in looooops:
+    path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id']))
+    if not os.path.exists(path):
+        os.mkdir(path)
+
+    print("\n")
+    print("Downloading looops by ***{}***".format(l['creator']))
+    for a in l['audio']:
+        if a['remote_url']:
+            url = a['remote_url']
+        else: 
+            url = a['url']
+
+        grab_media(path, url)

-#for l in looooops:
-	# create a folder per l, named id
-	# download the files in media_attachments using the remote_url
-	# find a way to stuff metadata in the file