diff --git a/download_loooooops.py b/download_loooooops.py new file mode 100644 index 0000000..dde6dcf --- /dev/null +++ b/download_loooooops.py @@ -0,0 +1,62 @@ +import requests +from time import sleep + + +#def download_media(dir, url): +# remote_url +# description + + +#This pages through all the looptober tag and collects the json in 'data' +there_is_more = True +url = "https://post.lurk.org/api/v1/timelines/tag/looptober" +data = [] +while there_is_more: + print("downloading", url) + r = requests.get(url) + print(r.status_code) + if r.ok: + if r.content: + + data.append(r.json()) + print(len(data)) + sleep(1) + + if r.links: + url = r.links["next"]["url"] + print("found next url", url) + + else: + print("no more data") + there_is_more = False + break + else: + break + +#this parses all the json, taking a few valuable fields and puts them in looooops +looooops = [] +for collection in data: + for i in collection: + if i["media_attachments"]: #we only take entries that actually contain a sound file + creation_date = datetime.datetime.fromisoformat( + i['created_at'][:-1]).astimezone( + datetime.timezone.utc) + + if creation_date.strftime('%Y') == "2022": #we only take entries from this year + stuff = {} + stuff["url"] = i["url"] + stuff["description"] = i["content"] + stuff["audio"] = i["media_attachments"] + stuff["date"] = i["created_at"] + stuff["id"] = i["id"] + stuff["creator"] = i["account"]["username"] + looooops.append(stuff) + print("found post by {} with {} looops".format( + i["account"]["username"], + len(i["media_attachments"]))) + + +#for l in looooops: + # create a folder per l, named id + # download the files in media_attachments using the remote_url + # find a way to stuff metadata in the file