rra
3 years ago
commit
6f77863469
3 changed files with 218 additions and 0 deletions
@ -0,0 +1,12 @@ |
|||||
|
https://www.masartemasaccion.org/feed/ |
||||
|
https://fafswag.wordpress.com/feed/ |
||||
|
https://wajukuuarts.wordpress.com/feed/ |
||||
|
https://sakakini.org/feed/ |
||||
|
https://inland.org/feed/ |
||||
|
https://jatiwangiartfactory.tumblr.com/feed/ |
||||
|
https://brittoartstrust.org/feed/ |
||||
|
https://artivismo.org/feed/ |
||||
|
http://www.festivalsegou.org/spip.php?page=backend&lang=fr |
||||
|
https://gudskul.art/feed/ |
||||
|
https://projectartworks.org/feed/ |
||||
|
https://ruangrupa.id/feed/ |
@ -0,0 +1,11 @@ |
|||||
|
--- |
||||
|
title: "{{ frontmatter.title }}" |
||||
|
date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 |
||||
|
draft: false |
||||
|
summary: "{{ frontmatter.summary }}" |
||||
|
author: "{{ frontmatter.author }}" |
||||
|
original_link: "{{ frontmatter.link }}" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
{{ content }} |
@ -0,0 +1,195 @@ |
|||||
|
#!/bin/python3 |
||||
|
|
||||
|
#lumbung.space rss feed aggregator |
||||
|
#© 2021 roel roscam abbing gplv3 etc |
||||
|
|
||||
|
import requests |
||||
|
import jinja2 |
||||
|
import os |
||||
|
import shutil |
||||
|
import feedparser |
||||
|
from urllib.parse import urlparse |
||||
|
from ast import literal_eval as make_tuple |
||||
|
from slugify import slugify |
||||
|
from bs4 import BeautifulSoup |
||||
|
import time |
||||
|
import arrow |
||||
|
|
||||
|
|
||||
|
def write_etag(feed_name, feed_data): |
||||
|
""" |
||||
|
save timestamp of when feed was last modified |
||||
|
""" |
||||
|
etag = '' |
||||
|
modified = '' |
||||
|
|
||||
|
if 'etag' in data: |
||||
|
etag = data.etag |
||||
|
if 'modified' in data: |
||||
|
modified = data.modified |
||||
|
|
||||
|
if etag or modified: |
||||
|
with open(os.path.join('etags',feed_name +'.txt'),'w') as f: |
||||
|
f.write(str((etag, modified))) |
||||
|
|
||||
|
def get_etag(feed_name): |
||||
|
""" |
||||
|
return timestamp of when feed was last modified |
||||
|
""" |
||||
|
fn = os.path.join('etags',feed_name +'.txt') |
||||
|
etag = '' |
||||
|
modified = '' |
||||
|
|
||||
|
if os.path.exists(fn): |
||||
|
etag, modified = make_tuple(open(fn,'r').read()) |
||||
|
|
||||
|
return etag, modified |
||||
|
|
||||
|
def create_frontmatter(entry): |
||||
|
""" |
||||
|
parse RSS metadata and return as frontmatter |
||||
|
""" |
||||
|
if 'published' in entry: |
||||
|
published = entry.published_parsed |
||||
|
if 'updated' in entry: |
||||
|
published = entry.updated_parsed |
||||
|
|
||||
|
published = arrow.get(published) |
||||
|
|
||||
|
frontmatter = { |
||||
|
'title':entry.title, |
||||
|
'date': published.format(), |
||||
|
'summary': '', |
||||
|
'author': entry.author, |
||||
|
'original_link': entry.link |
||||
|
} |
||||
|
|
||||
|
return frontmatter |
||||
|
|
||||
|
def create_post(post_dir, entry): |
||||
|
""" |
||||
|
write hugo post based on RSS entry |
||||
|
""" |
||||
|
frontmatter = create_frontmatter(entry) |
||||
|
|
||||
|
if not os.path.exists(post_dir): |
||||
|
os.makedirs(post_dir) |
||||
|
|
||||
|
post_content = entry.content[0].value |
||||
|
|
||||
|
parsed_content = parse_posts(post_dir, post_content) |
||||
|
|
||||
|
with open(os.path.join(post_dir,'index.html'),'w') as f: |
||||
|
post = template.render(frontmatter=frontmatter, content=parsed_content) |
||||
|
f.write(post) |
||||
|
print('created post for', entry.title, '({})'.format(entry.link)) |
||||
|
|
||||
|
def grab_media(post_directory, url): |
||||
|
""" |
||||
|
download media linked in post to have local copy |
||||
|
if download succeeds return new local path otherwise return url |
||||
|
""" |
||||
|
image = urlparse(url).path.split('/')[-1] |
||||
|
|
||||
|
try: |
||||
|
#TODO: stream is true is a conditional so we could check the headers for things, mimetype etc |
||||
|
response = requests.get(url, stream=True) |
||||
|
except Exception as e: |
||||
|
print(e) |
||||
|
return url |
||||
|
|
||||
|
try: |
||||
|
if not os.path.exists(os.path.join(post_directory, image)): |
||||
|
with open(os.path.join(post_directory, image), 'wb') as img_file: |
||||
|
shutil.copyfileobj(response.raw, img_file) |
||||
|
print('Downloaded cover image', image) |
||||
|
return image |
||||
|
return image |
||||
|
|
||||
|
except Exception as e: |
||||
|
print('Failed to download cover image', url) |
||||
|
print(e) |
||||
|
return url |
||||
|
|
||||
|
|
||||
|
def parse_posts(post_direntry, post_content): |
||||
|
""" |
||||
|
parse the post content to for media items |
||||
|
replace foreign media item with local copy |
||||
|
""" |
||||
|
soup = BeautifulSoup(post_content, "html.parser") |
||||
|
media = [] |
||||
|
for img in soup(['img','object']): |
||||
|
|
||||
|
local_image = grab_media(post_dir, img['src']) |
||||
|
|
||||
|
if img['src'] != local_image: |
||||
|
print(img['src'], '->', local_image) |
||||
|
img['src'] = local_image |
||||
|
return soup.decode() |
||||
|
|
||||
|
|
||||
|
feed_urls = open('feeds_list.txt','r').read().splitlines() |
||||
|
|
||||
|
start = time.time() |
||||
|
|
||||
|
if not os.path.exists('etags'): |
||||
|
os.mkdir('etags') |
||||
|
|
||||
|
|
||||
|
env = jinja2.Environment( |
||||
|
loader=jinja2.FileSystemLoader(os.path.curdir) |
||||
|
) |
||||
|
|
||||
|
output_dir = os.environ.get('OUTPUT_DIR', '/home/r/Programming/lumbung.space/lumbung.space-web/content/posts/') |
||||
|
#output_dir = os.environ.get('OUTPUT_DIR', 'network/') |
||||
|
|
||||
|
if not os.path.exists(output_dir): |
||||
|
os.makedirs(output_dir) |
||||
|
|
||||
|
template = env.get_template('post_template.md') |
||||
|
|
||||
|
|
||||
|
for feed_url in feed_urls[7:]: |
||||
|
feed_name = urlparse(feed_url).netloc |
||||
|
|
||||
|
etag, modified = get_etag(feed_name) |
||||
|
if modified: |
||||
|
data = feedparser.parse(feed_url, modified=modified) |
||||
|
elif etag: |
||||
|
data = feedparser.parse(feed_url, etag=etag) |
||||
|
else: |
||||
|
data = feedparser.parse(feed_url) |
||||
|
|
||||
|
print(data.status, feed_url) |
||||
|
|
||||
|
if data.status == 200: |
||||
|
|
||||
|
#write_etag(feed_url, data) |
||||
|
|
||||
|
# if 'title' in data.feed: |
||||
|
# print('#'*10) |
||||
|
# print(data.feed.title) |
||||
|
# print('#'*10) |
||||
|
# print('\n') |
||||
|
|
||||
|
# print('FEED KEYS') |
||||
|
# print(data.keys()) |
||||
|
# print('\n') |
||||
|
|
||||
|
for entry in data.entries: |
||||
|
# print(entry.title) |
||||
|
# print(entry.keys()) |
||||
|
# print('\n') |
||||
|
# # if 'tags' in entry: |
||||
|
# # print(entry.title, entry.tags) |
||||
|
|
||||
|
post_dir = os.path.join(output_dir, feed_name, slugify(entry.title)) |
||||
|
create_post(post_dir, entry) |
||||
|
|
||||
|
|
||||
|
|
||||
|
end = time.time() |
||||
|
|
||||
|
print(end - start) |
||||
|
|
Loading…
Reference in new issue