|
|
@ -1,11 +1,32 @@ |
|
|
|
import requests |
|
|
|
from time import sleep |
|
|
|
|
|
|
|
import datetime |
|
|
|
import os |
|
|
|
from urllib.parse import urlparse |
|
|
|
import shutil |
|
|
|
|
|
|
|
#def download_media(dir, url): |
|
|
|
# remote_url |
|
|
|
# description |
|
|
|
|
|
|
|
output_dir = "/home/r/Programming/radio-looptober/loops" |
|
|
|
|
|
|
|
def grab_media(path, url, filename): |
|
|
|
|
|
|
|
media_item = urlparse(url).path.split('/')[-1] |
|
|
|
|
|
|
|
headers = { |
|
|
|
'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober', |
|
|
|
'From': 'post.lurk.org/@lurk' # This is another valid field |
|
|
|
} |
|
|
|
|
|
|
|
if not os.path.exists(os.path.join(path, media_item)): |
|
|
|
response = requests.get(url, headers=headers, stream=True) |
|
|
|
if response.ok: |
|
|
|
with open(os.path.join(path, media_item), 'wb') as media_file: |
|
|
|
shutil.copyfileobj(response.raw, media_file) |
|
|
|
print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc)) |
|
|
|
return media_item |
|
|
|
|
|
|
|
#This pages through all the looptober tag and collects the json in 'data' |
|
|
|
there_is_more = True |
|
|
@ -14,13 +35,13 @@ data = [] |
|
|
|
while there_is_more: |
|
|
|
print("downloading", url) |
|
|
|
r = requests.get(url) |
|
|
|
print(r.status_code) |
|
|
|
print("response status: ", r.status_code) |
|
|
|
if r.ok: |
|
|
|
if r.content: |
|
|
|
|
|
|
|
data.append(r.json()) |
|
|
|
print(len(data)) |
|
|
|
sleep(1) |
|
|
|
print("amount of pages:", len(data)) |
|
|
|
sleep(0.5) |
|
|
|
|
|
|
|
if r.links: |
|
|
|
url = r.links["next"]["url"] |
|
|
@ -55,8 +76,21 @@ for collection in data: |
|
|
|
i["account"]["username"], |
|
|
|
len(i["media_attachments"]))) |
|
|
|
|
|
|
|
if not os.path.exists(output_dir): |
|
|
|
os.mkdir(output_dir) |
|
|
|
|
|
|
|
for l in looooops: |
|
|
|
path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id'])) |
|
|
|
if not os.path.exists(path): |
|
|
|
os.mkdir(path) |
|
|
|
|
|
|
|
print("\n") |
|
|
|
print("Downloading looops by ***{}***".format(l['creator'])) |
|
|
|
for a in l['audio']: |
|
|
|
if a['remote_url']: |
|
|
|
url = a['remote_url'] |
|
|
|
else: |
|
|
|
url = a['url'] |
|
|
|
|
|
|
|
grab_media(path, url) |
|
|
|
|
|
|
|
#for l in looooops: |
|
|
|
# create a folder per l, named id |
|
|
|
# download the files in media_attachments using the remote_url |
|
|
|
# find a way to stuff metadata in the file |
|
|
|