rra
2 years ago
1 changed files with 62 additions and 0 deletions
@ -0,0 +1,62 @@ |
|||
import requests |
|||
from time import sleep |
|||
|
|||
|
|||
#def download_media(dir, url): |
|||
# remote_url |
|||
# description |
|||
|
|||
|
|||
#This pages through all the looptober tag and collects the json in 'data' |
|||
there_is_more = True |
|||
url = "https://post.lurk.org/api/v1/timelines/tag/looptober" |
|||
data = [] |
|||
while there_is_more: |
|||
print("downloading", url) |
|||
r = requests.get(url) |
|||
print(r.status_code) |
|||
if r.ok: |
|||
if r.content: |
|||
|
|||
data.append(r.json()) |
|||
print(len(data)) |
|||
sleep(1) |
|||
|
|||
if r.links: |
|||
url = r.links["next"]["url"] |
|||
print("found next url", url) |
|||
|
|||
else: |
|||
print("no more data") |
|||
there_is_more = False |
|||
break |
|||
else: |
|||
break |
|||
|
|||
#this parses all the json, taking a few valuable fields and puts them in looooops |
|||
looooops = [] |
|||
for collection in data: |
|||
for i in collection: |
|||
if i["media_attachments"]: #we only take entries that actually contain a sound file |
|||
creation_date = datetime.datetime.fromisoformat( |
|||
i['created_at'][:-1]).astimezone( |
|||
datetime.timezone.utc) |
|||
|
|||
if creation_date.strftime('%Y') == "2022": #we only take entries from this year |
|||
stuff = {} |
|||
stuff["url"] = i["url"] |
|||
stuff["description"] = i["content"] |
|||
stuff["audio"] = i["media_attachments"] |
|||
stuff["date"] = i["created_at"] |
|||
stuff["id"] = i["id"] |
|||
stuff["creator"] = i["account"]["username"] |
|||
looooops.append(stuff) |
|||
print("found post by {} with {} looops".format( |
|||
i["account"]["username"], |
|||
len(i["media_attachments"]))) |
|||
|
|||
|
|||
#for l in looooops: |
|||
# create a folder per l, named id |
|||
# download the files in media_attachments using the remote_url |
|||
# find a way to stuff metadata in the file |
Loading…
Reference in new issue