radio-looptober-lurk/download_loooooops.py


								#!/usr/bin/env python3


								import requests

								from time import sleep

								import datetime, shutil, os

								from urllib.parse import urlparse


								#def download_media(dir, url):

								#   remote_url

								#   description


								year = "2023"


								output_dir = os.path.join("loooooops", year)


								bitrate = "128k"


								def transcode_media(path, media_item, metadata):

								    infile = os.path.join(path, media_item)

								    outfile = os.path.join(path, media_item + ".opus")

								    if not os.path.exists(outfile):

								        print("transcodeing to {}".format(outfile))

								        pid = os.fork()

								        if pid == 0:

								            artist = metadata["creator"]

								            title = metadata["url"]

								            comment = metadata["description"]

								            date = metadata["date"]

								            os.execlp("ffmpeg", "ffmpeg", "-hide_banner", "-loglevel", "error", "-i", infile, "-map_metadata", "-1", "-metadata", "artist={}".format(artist), "-metadata", "title={}".format(title), "-metadata", "creation_time={}".format(date), "-map_chapters", "-1", "-ac", "2", "-af", "loudnorm=dual_mono=true", "-b:a", bitrate, "-y", outfile)

									    # never reached

								        else:

								            os.wait()


								def grab_media(path, url):


								  try:

								    media_item = urlparse(url).path.split('/')[-1]


								    headers = {

								    'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober',

								    'From': 'post.lurk.org/@lurk'  # This is another valid field

								    }


								    if os.path.exists(os.path.join(path, media_item)):

								        return media_item

								    else:

								        response = requests.get(url, headers=headers, stream=True)

								        if response.ok:

								            with open(os.path.join(path, media_item), 'wb') as media_file:

								                shutil.copyfileobj(response.raw, media_file)

								                print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc))

								                return media_item

								  except requests.exceptions.ConnectionError as e:

								    # maybe transient network issues

								    print(e)

								    sleep(60)


								#This pages through all the looptober tag and collects the json in 'data'

								there_is_more = True

								url = "https://post.lurk.org/api/v1/timelines/tag/looptober"

								data = []

								while there_is_more:

								    print("downloading", url)

								    r = requests.get(url)

								    print("response status: ", r.status_code)

								    if r.ok:

								        if r.content:


								            data.append(r.json())

								            print("amount of pages:", len(data))

								            sleep(0.5)


								            if r.links:

								                url = r.links["next"]["url"]

								                print("found next url", url)


								            else:

								                print("no more data")

								                there_is_more = False

								                break

								    else:

								        break


								#this parses all the json, taking a few valuable fields and puts them in looooops

								looooops = []

								for collection in data:

								    for i in collection:

								        if i["media_attachments"]: #we only take entries that actually contain a sound file

								            creation_date = datetime.datetime.fromisoformat(

								                i['created_at'][:-1]).astimezone(

								                datetime.timezone.utc)


								            if creation_date.strftime('%Y') == year: #we only take entries from this year

								                stuff = {}

								                stuff["url"] = i["url"]

								                stuff["description"] = i["content"]

								                stuff["audio"] = i["media_attachments"]

								                stuff["date"] = i["created_at"]

								                stuff["id"] = i["id"]

								                stuff["creator"] = i["account"]["username"]

								                looooops.append(stuff)

								                print("found post by {} with {} looops".format(

								                    i["account"]["username"],

								                    len(i["media_attachments"])))


								for l in looooops:

								    path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id']))

								    os.makedirs(path, exist_ok=True)


								    print("\n")

								    print("Downloading looops by ***{}***".format(l['creator']))

								    for a in l['audio']:

								        if a['remote_url']:

								            url = a['remote_url']

								        else:

								            url = a['url']


								        media_item = grab_media(path, url)

								        if media_item:

								            transcode_media(path, media_item, l)


								# Once we've done everythin we generate a playlist and ask ezstream

								# to reload it

								# this is not an injection vulnerability as output_dir is under

								# our control

								os.system('find {} -iname "*opus" > playlist_loooooops.m3u'\

								    '&& kill -s HUP `pidof ezstream`'.format(output_dir))