You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

127 lines
4.2 KiB

#!/usr/bin/env python3
import requests
from time import sleep
import datetime, shutil, os
from urllib.parse import urlparse
#def download_media(dir, url):
# remote_url
# description
year = "2023"
output_dir = os.path.join("loooooops", year)
bitrate = "128k"
def transcode_media(path, media_item, metadata):
infile = os.path.join(path, media_item)
outfile = os.path.join(path, media_item + ".opus")
if not os.path.exists(outfile):
print("transcodeing to {}".format(outfile))
pid = os.fork()
if pid == 0:
artist = metadata["creator"]
title = metadata["url"]
comment = metadata["description"]
date = metadata["date"]
os.execlp("ffmpeg", "ffmpeg", "-hide_banner", "-loglevel", "error", "-i", infile, "-map_metadata", "-1", "-metadata", "artist={}".format(artist), "-metadata", "title={}".format(title), "-metadata", "creation_time={}".format(date), "-map_chapters", "-1", "-ac", "2", "-af", "loudnorm=dual_mono=true", "-b:a", bitrate, "-y", outfile)
# never reached
else:
os.wait()
def grab_media(path, url):
try:
media_item = urlparse(url).path.split('/')[-1]
headers = {
'User-Agent': 'https://git.vvvvvvaria.org/rra/radio-looptober',
'From': 'post.lurk.org/@lurk' # This is another valid field
}
if os.path.exists(os.path.join(path, media_item)):
return media_item
else:
response = requests.get(url, headers=headers, stream=True)
if response.ok:
with open(os.path.join(path, media_item), 'wb') as media_file:
shutil.copyfileobj(response.raw, media_file)
print('Downloaded media {} from {}'.format(media_item, urlparse(url).netloc))
return media_item
except requests.exceptions.ConnectionError as e:
# maybe transient network issues
print(e)
sleep(60)
#This pages through all the looptober tag and collects the json in 'data'
there_is_more = True
url = "https://post.lurk.org/api/v1/timelines/tag/looptober"
data = []
while there_is_more:
print("downloading", url)
r = requests.get(url)
print("response status: ", r.status_code)
if r.ok:
if r.content:
data.append(r.json())
print("amount of pages:", len(data))
sleep(0.5)
if r.links:
url = r.links["next"]["url"]
print("found next url", url)
else:
print("no more data")
there_is_more = False
break
else:
break
#this parses all the json, taking a few valuable fields and puts them in looooops
looooops = []
for collection in data:
for i in collection:
if i["media_attachments"]: #we only take entries that actually contain a sound file
creation_date = datetime.datetime.fromisoformat(
i['created_at'][:-1]).astimezone(
datetime.timezone.utc)
if creation_date.strftime('%Y') == year: #we only take entries from this year
stuff = {}
stuff["url"] = i["url"]
stuff["description"] = i["content"]
stuff["audio"] = i["media_attachments"]
stuff["date"] = i["created_at"]
stuff["id"] = i["id"]
stuff["creator"] = i["account"]["username"]
looooops.append(stuff)
print("found post by {} with {} looops".format(
i["account"]["username"],
len(i["media_attachments"])))
for l in looooops:
path = os.path.join(output_dir,"{}_{}".format(l['creator'], l['id']))
os.makedirs(path, exist_ok=True)
print("\n")
print("Downloading looops by ***{}***".format(l['creator']))
for a in l['audio']:
if a['remote_url']:
url = a['remote_url']
else:
url = a['url']
media_item = grab_media(path, url)
if media_item:
transcode_media(path, media_item, l)
# Once we've done everythin we generate a playlist and ask ezstream
# to reload it
# this is not an injection vulnerability as output_dir is under
# our control
os.system('find {} -iname "*opus" > playlist_loooooops.m3u'\
'&& kill -s HUP `pidof ezstream`'.format(output_dir))