now it actually downloads loops

2 years ago · 9a10f4f97a
1 changed files with 81 additions and 47 deletions
--- a/download_loooooops.py
+++ b/download_loooooops.py
@ -1,62 +1,96 @@
 import requests
 from time import sleep
-
+import datetime
 import os
 from urllib.parse import urlparse
 import shutil
 #def download_media(dir, url):
-#	remote_url
+#   remote_url
-#	description
+#   description
 output_dir = "/home/r/Programming/radio-looptober/loops"
 def grab_media(path, url, filename):
    media_item = urlparse(url).path.split('/')[-1]
    headers = {
    'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober',
    'From': 'post.lurk.org/@lurk'  # This is another valid field
    } 
    if not os.path.exists(os.path.join(path, media_item)):
        response = requests.get(url, headers=headers, stream=True)
        if response.ok:
            with open(os.path.join(path, media_item), 'wb') as media_file:
                shutil.copyfileobj(response.raw, media_file)
                print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc))
                return media_item
 #This pages through all the looptober tag and collects the json in 'data'
 there_is_more = True
 url = "https://post.lurk.org/api/v1/timelines/tag/looptober"
 data = []
 while there_is_more:
-	print("downloading", url)
+    print("downloading", url)
-	r = requests.get(url)
+    r = requests.get(url)
-	print(r.status_code)
+    print("response status: ", r.status_code)
-	if r.ok:
+    if r.ok:
-		if r.content:
+        if r.content:
-
+
-			data.append(r.json())
+            data.append(r.json())
-			print(len(data))
+            print("amount of pages:", len(data))
-			sleep(1)
+            sleep(0.5)
-
+
-			if r.links:
+            if r.links:
-				url = r.links["next"]["url"]
+                url = r.links["next"]["url"]
-				print("found next url", url)
+                print("found next url", url)
-
+
-			else:
+            else:
-				print("no more data")
+                print("no more data")
-				there_is_more = False
+                there_is_more = False
-				break
+                break
-	else:
+    else:
-		break
+        break
 #this parses all the json, taking a few valuable fields and puts them in looooops
 looooops = []
 for collection in data:
-	for i in collection:
+    for i in collection:
-		if i["media_attachments"]: #we only take entries that actually contain a sound file
+        if i["media_attachments"]: #we only take entries that actually contain a sound file
- 			creation_date = datetime.datetime.fromisoformat(
+            creation_date = datetime.datetime.fromisoformat(
-				i['created_at'][:-1]).astimezone(
+                i['created_at'][:-1]).astimezone(
-				datetime.timezone.utc)
+                datetime.timezone.utc)
-
+
-			if creation_date.strftime('%Y') == "2022": #we only take entries from this year
+            if creation_date.strftime('%Y') == "2022": #we only take entries from this year
-				stuff = {}
+                stuff = {}
-				stuff["url"] = i["url"]
+                stuff["url"] = i["url"]
-				stuff["description"] = i["content"]
+                stuff["description"] = i["content"]
-				stuff["audio"] = i["media_attachments"]
+                stuff["audio"] = i["media_attachments"]
-				stuff["date"] = i["created_at"]
+                stuff["date"] = i["created_at"]
-				stuff["id"] = i["id"]
+                stuff["id"] = i["id"]
-				stuff["creator"] = i["account"]["username"] 
+                stuff["creator"] = i["account"]["username"] 
-				looooops.append(stuff)
+                looooops.append(stuff)
-				print("found post by {} with {} looops".format(
+                print("found post by {} with {} looops".format(
-					i["account"]["username"],
+                    i["account"]["username"],
-					len(i["media_attachments"])))
+                    len(i["media_attachments"])))
-
+
-
+if not os.path.exists(output_dir):
-#for l in looooops:
+    os.mkdir(output_dir)
-	# create a folder per l, named id
+
-	# download the files in media_attachments using the remote_url
+for l in looooops:
-	# find a way to stuff metadata in the file
+    path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id']))
    if not os.path.exists(path):
        os.mkdir(path)
    print("\n")
    print("Downloading looops by ***{}***".format(l['creator']))
    for a in l['audio']:
        if a['remote_url']:
            url = a['remote_url']
        else: 
            url = a['url']
        grab_media(path, url)