now it actually downloads loops

2 years ago · 9a10f4f97a
1 changed files with 81 additions and 47 deletions
--- a/download_loooooops.py
+++ b/download_loooooops.py
@ -1,11 +1,32 @@
 import requests
 from time import sleep
-
+import datetime
 import os
 from urllib.parse import urlparse
 import shutil
 #def download_media(dir, url):
 #   remote_url
 #   description
 output_dir = "/home/r/Programming/radio-looptober/loops"
 def grab_media(path, url, filename):
    media_item = urlparse(url).path.split('/')[-1]
    headers = {
    'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober',
    'From': 'post.lurk.org/@lurk'  # This is another valid field
    } 
    if not os.path.exists(os.path.join(path, media_item)):
        response = requests.get(url, headers=headers, stream=True)
        if response.ok:
            with open(os.path.join(path, media_item), 'wb') as media_file:
                shutil.copyfileobj(response.raw, media_file)
                print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc))
                return media_item
 #This pages through all the looptober tag and collects the json in 'data'
 there_is_more = True
@ -14,13 +35,13 @@ data = []
 while there_is_more:
    print("downloading", url)
    r = requests.get(url)
-	print(r.status_code)
+    print("response status: ", r.status_code)
    if r.ok:
        if r.content:
            data.append(r.json())
-			print(len(data))
+            print("amount of pages:", len(data))
-			sleep(1)
+            sleep(0.5)
            if r.links:
                url = r.links["next"]["url"]
@ -55,8 +76,21 @@ for collection in data:
                    i["account"]["username"],
                    len(i["media_attachments"])))
 if not os.path.exists(output_dir):
    os.mkdir(output_dir)
 for l in looooops:
    path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id']))
    if not os.path.exists(path):
        os.mkdir(path)
    print("\n")
    print("Downloading looops by ***{}***".format(l['creator']))
    for a in l['audio']:
        if a['remote_url']:
            url = a['remote_url']
        else: 
            url = a['url']
        grab_media(path, url)
 #for l in looooops:
 	# create a folder per l, named id
 	# download the files in media_attachments using the remote_url
 	# find a way to stuff metadata in the file