radio-looptober-lurk/download_loooooops.py

import requests
from time import sleep


#def download_media(dir, url):
#	remote_url
#	description


#This pages through all the looptober tag and collects the json in 'data'
there_is_more = True
url = "https://post.lurk.org/api/v1/timelines/tag/looptober"
data = []
while there_is_more:
	print("downloading", url)
	r = requests.get(url)
	print(r.status_code)
	if r.ok:
		if r.content:

			data.append(r.json())
			print(len(data))
			sleep(1)

			if r.links:
				url = r.links["next"]["url"]
				print("found next url", url)

			else:
				print("no more data")
				there_is_more = False
				break
	else:
		break

#this parses all the json, taking a few valuable fields and puts them in looooops
looooops = []
for collection in data:
	for i in collection:
		if i["media_attachments"]: #we only take entries that actually contain a sound file
 			creation_date = datetime.datetime.fromisoformat(
				i['created_at'][:-1]).astimezone(
				datetime.timezone.utc)

			if creation_date.strftime('%Y') == "2022": #we only take entries from this year
				stuff = {}
				stuff["url"] = i["url"]
				stuff["description"] = i["content"]
				stuff["audio"] = i["media_attachments"]
				stuff["date"] = i["created_at"]
				stuff["id"] = i["id"]
				stuff["creator"] = i["account"]["username"] 
				looooops.append(stuff)
				print("found post by {} with {} looops".format(
					i["account"]["username"],
					len(i["media_attachments"])))


#for l in looooops:
	# create a folder per l, named id
	# download the files in media_attachments using the remote_url
	# find a way to stuff metadata in the file
initial commit, grabs all possible loops 2 years ago			`import requests`
			`from time import sleep`


			`#def download_media(dir, url):`
			`# remote_url`
			`# description`


			`#This pages through all the looptober tag and collects the json in 'data'`
			`there_is_more = True`
			`url = "https://post.lurk.org/api/v1/timelines/tag/looptober"`
			`data = []`
			`while there_is_more:`
			`print("downloading", url)`
			`r = requests.get(url)`
			`print(r.status_code)`
			`if r.ok:`
			`if r.content:`

			`data.append(r.json())`
			`print(len(data))`
			`sleep(1)`

			`if r.links:`
			`url = r.links["next"]["url"]`
			`print("found next url", url)`

			`else:`
			`print("no more data")`
			`there_is_more = False`
			`break`
			`else:`
			`break`

			`#this parses all the json, taking a few valuable fields and puts them in looooops`
			`looooops = []`
			`for collection in data:`
			`for i in collection:`
			`if i["media_attachments"]: #we only take entries that actually contain a sound file`
			`creation_date = datetime.datetime.fromisoformat(`
			`i['created_at'][:-1]).astimezone(`
			`datetime.timezone.utc)`

			`if creation_date.strftime('%Y') == "2022": #we only take entries from this year`
			`stuff = {}`
			`stuff["url"] = i["url"]`
			`stuff["description"] = i["content"]`
			`stuff["audio"] = i["media_attachments"]`
			`stuff["date"] = i["created_at"]`
			`stuff["id"] = i["id"]`
			`stuff["creator"] = i["account"]["username"]`
			`looooops.append(stuff)`
			`print("found post by {} with {} looops".format(`
			`i["account"]["username"],`
			`len(i["media_attachments"])))`


			`#for l in looooops:`
			`# create a folder per l, named id`
			`# download the files in media_attachments using the remote_url`
			`# find a way to stuff metadata in the file`