initial commit, grabs all possible loops

2024-12-22 04:30:31 +01:00 · 2022-10-05 13:55:50 +02:00 · 2022-10-05 13:55:50 +02:00 · 4dfeed8311
commit 4dfeed8311
parent 8dfa491254
1 changed files with 62 additions and 0 deletions
--- a/download_loooooops.py
+++ b/download_loooooops.py
@ -0,0 +1,62 @@
+import requests
+from time import sleep
+
+
+#def download_media(dir, url):
+#	remote_url
+#	description
+
+
+#This pages through all the looptober tag and collects the json in 'data'
+there_is_more = True
+url = "https://post.lurk.org/api/v1/timelines/tag/looptober"
+data = []
+while there_is_more:
+	print("downloading", url)
+	r = requests.get(url)
+	print(r.status_code)
+	if r.ok:
+		if r.content:
+
+			data.append(r.json())
+			print(len(data))
+			sleep(1)
+
+			if r.links:
+				url = r.links["next"]["url"]
+				print("found next url", url)
+
+			else:
+				print("no more data")
+				there_is_more = False
+				break
+	else:
+		break
+
+#this parses all the json, taking a few valuable fields and puts them in looooops
+looooops = []
+for collection in data:
+	for i in collection:
+		if i["media_attachments"]: #we only take entries that actually contain a sound file
+ 			creation_date = datetime.datetime.fromisoformat(
+				i['created_at'][:-1]).astimezone(
+				datetime.timezone.utc)
+
+			if creation_date.strftime('%Y') == "2022": #we only take entries from this year
+				stuff = {}
+				stuff["url"] = i["url"]
+				stuff["description"] = i["content"]
+				stuff["audio"] = i["media_attachments"]
+				stuff["date"] = i["created_at"]
+				stuff["id"] = i["id"]
+				stuff["creator"] = i["account"]["username"] 
+				looooops.append(stuff)
+				print("found post by {} with {} looops".format(
+					i["account"]["username"],
+					len(i["media_attachments"])))
+
+
+#for l in looooops:
+	# create a folder per l, named id
+	# download the files in media_attachments using the remote_url
+	# find a way to stuff metadata in the file