import requests from time import sleep #def download_media(dir, url): # remote_url # description #This pages through all the looptober tag and collects the json in 'data' there_is_more = True url = "https://post.lurk.org/api/v1/timelines/tag/looptober" data = [] while there_is_more: print("downloading", url) r = requests.get(url) print(r.status_code) if r.ok: if r.content: data.append(r.json()) print(len(data)) sleep(1) if r.links: url = r.links["next"]["url"] print("found next url", url) else: print("no more data") there_is_more = False break else: break #this parses all the json, taking a few valuable fields and puts them in looooops looooops = [] for collection in data: for i in collection: if i["media_attachments"]: #we only take entries that actually contain a sound file creation_date = datetime.datetime.fromisoformat( i['created_at'][:-1]).astimezone( datetime.timezone.utc) if creation_date.strftime('%Y') == "2022": #we only take entries from this year stuff = {} stuff["url"] = i["url"] stuff["description"] = i["content"] stuff["audio"] = i["media_attachments"] stuff["date"] = i["created_at"] stuff["id"] = i["id"] stuff["creator"] = i["account"]["username"] looooops.append(stuff) print("found post by {} with {} looops".format( i["account"]["username"], len(i["media_attachments"]))) #for l in looooops: # create a folder per l, named id # download the files in media_attachments using the remote_url # find a way to stuff metadata in the file