You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
1.6 KiB

import requests
from time import sleep
#def download_media(dir, url):
# remote_url
# description
#This pages through all the looptober tag and collects the json in 'data'
there_is_more = True
url = "https://post.lurk.org/api/v1/timelines/tag/looptober"
data = []
while there_is_more:
print("downloading", url)
r = requests.get(url)
print(r.status_code)
if r.ok:
if r.content:
data.append(r.json())
print(len(data))
sleep(1)
if r.links:
url = r.links["next"]["url"]
print("found next url", url)
else:
print("no more data")
there_is_more = False
break
else:
break
#this parses all the json, taking a few valuable fields and puts them in looooops
looooops = []
for collection in data:
for i in collection:
if i["media_attachments"]: #we only take entries that actually contain a sound file
creation_date = datetime.datetime.fromisoformat(
i['created_at'][:-1]).astimezone(
datetime.timezone.utc)
if creation_date.strftime('%Y') == "2022": #we only take entries from this year
stuff = {}
stuff["url"] = i["url"]
stuff["description"] = i["content"]
stuff["audio"] = i["media_attachments"]
stuff["date"] = i["created_at"]
stuff["id"] = i["id"]
stuff["creator"] = i["account"]["username"]
looooops.append(stuff)
print("found post by {} with {} looops".format(
i["account"]["username"],
len(i["media_attachments"])))
#for l in looooops:
# create a folder per l, named id
# download the files in media_attachments using the remote_url
# find a way to stuff metadata in the file