rra
3 years ago
commit
6f77863469
3 changed files with 218 additions and 0 deletions
@ -0,0 +1,12 @@ |
|||
https://www.masartemasaccion.org/feed/ |
|||
https://fafswag.wordpress.com/feed/ |
|||
https://wajukuuarts.wordpress.com/feed/ |
|||
https://sakakini.org/feed/ |
|||
https://inland.org/feed/ |
|||
https://jatiwangiartfactory.tumblr.com/feed/ |
|||
https://brittoartstrust.org/feed/ |
|||
https://artivismo.org/feed/ |
|||
http://www.festivalsegou.org/spip.php?page=backend&lang=fr |
|||
https://gudskul.art/feed/ |
|||
https://projectartworks.org/feed/ |
|||
https://ruangrupa.id/feed/ |
@ -0,0 +1,11 @@ |
|||
--- |
|||
title: "{{ frontmatter.title }}" |
|||
date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 |
|||
draft: false |
|||
summary: "{{ frontmatter.summary }}" |
|||
author: "{{ frontmatter.author }}" |
|||
original_link: "{{ frontmatter.link }}" |
|||
|
|||
--- |
|||
|
|||
{{ content }} |
@ -0,0 +1,195 @@ |
|||
#!/bin/python3 |
|||
|
|||
#lumbung.space rss feed aggregator |
|||
#© 2021 roel roscam abbing gplv3 etc |
|||
|
|||
import requests |
|||
import jinja2 |
|||
import os |
|||
import shutil |
|||
import feedparser |
|||
from urllib.parse import urlparse |
|||
from ast import literal_eval as make_tuple |
|||
from slugify import slugify |
|||
from bs4 import BeautifulSoup |
|||
import time |
|||
import arrow |
|||
|
|||
|
|||
def write_etag(feed_name, feed_data): |
|||
""" |
|||
save timestamp of when feed was last modified |
|||
""" |
|||
etag = '' |
|||
modified = '' |
|||
|
|||
if 'etag' in data: |
|||
etag = data.etag |
|||
if 'modified' in data: |
|||
modified = data.modified |
|||
|
|||
if etag or modified: |
|||
with open(os.path.join('etags',feed_name +'.txt'),'w') as f: |
|||
f.write(str((etag, modified))) |
|||
|
|||
def get_etag(feed_name): |
|||
""" |
|||
return timestamp of when feed was last modified |
|||
""" |
|||
fn = os.path.join('etags',feed_name +'.txt') |
|||
etag = '' |
|||
modified = '' |
|||
|
|||
if os.path.exists(fn): |
|||
etag, modified = make_tuple(open(fn,'r').read()) |
|||
|
|||
return etag, modified |
|||
|
|||
def create_frontmatter(entry): |
|||
""" |
|||
parse RSS metadata and return as frontmatter |
|||
""" |
|||
if 'published' in entry: |
|||
published = entry.published_parsed |
|||
if 'updated' in entry: |
|||
published = entry.updated_parsed |
|||
|
|||
published = arrow.get(published) |
|||
|
|||
frontmatter = { |
|||
'title':entry.title, |
|||
'date': published.format(), |
|||
'summary': '', |
|||
'author': entry.author, |
|||
'original_link': entry.link |
|||
} |
|||
|
|||
return frontmatter |
|||
|
|||
def create_post(post_dir, entry): |
|||
""" |
|||
write hugo post based on RSS entry |
|||
""" |
|||
frontmatter = create_frontmatter(entry) |
|||
|
|||
if not os.path.exists(post_dir): |
|||
os.makedirs(post_dir) |
|||
|
|||
post_content = entry.content[0].value |
|||
|
|||
parsed_content = parse_posts(post_dir, post_content) |
|||
|
|||
with open(os.path.join(post_dir,'index.html'),'w') as f: |
|||
post = template.render(frontmatter=frontmatter, content=parsed_content) |
|||
f.write(post) |
|||
print('created post for', entry.title, '({})'.format(entry.link)) |
|||
|
|||
def grab_media(post_directory, url): |
|||
""" |
|||
download media linked in post to have local copy |
|||
if download succeeds return new local path otherwise return url |
|||
""" |
|||
image = urlparse(url).path.split('/')[-1] |
|||
|
|||
try: |
|||
#TODO: stream is true is a conditional so we could check the headers for things, mimetype etc |
|||
response = requests.get(url, stream=True) |
|||
except Exception as e: |
|||
print(e) |
|||
return url |
|||
|
|||
try: |
|||
if not os.path.exists(os.path.join(post_directory, image)): |
|||
with open(os.path.join(post_directory, image), 'wb') as img_file: |
|||
shutil.copyfileobj(response.raw, img_file) |
|||
print('Downloaded cover image', image) |
|||
return image |
|||
return image |
|||
|
|||
except Exception as e: |
|||
print('Failed to download cover image', url) |
|||
print(e) |
|||
return url |
|||
|
|||
|
|||
def parse_posts(post_direntry, post_content): |
|||
""" |
|||
parse the post content to for media items |
|||
replace foreign media item with local copy |
|||
""" |
|||
soup = BeautifulSoup(post_content, "html.parser") |
|||
media = [] |
|||
for img in soup(['img','object']): |
|||
|
|||
local_image = grab_media(post_dir, img['src']) |
|||
|
|||
if img['src'] != local_image: |
|||
print(img['src'], '->', local_image) |
|||
img['src'] = local_image |
|||
return soup.decode() |
|||
|
|||
|
|||
feed_urls = open('feeds_list.txt','r').read().splitlines() |
|||
|
|||
start = time.time() |
|||
|
|||
if not os.path.exists('etags'): |
|||
os.mkdir('etags') |
|||
|
|||
|
|||
env = jinja2.Environment( |
|||
loader=jinja2.FileSystemLoader(os.path.curdir) |
|||
) |
|||
|
|||
output_dir = os.environ.get('OUTPUT_DIR', '/home/r/Programming/lumbung.space/lumbung.space-web/content/posts/') |
|||
#output_dir = os.environ.get('OUTPUT_DIR', 'network/') |
|||
|
|||
if not os.path.exists(output_dir): |
|||
os.makedirs(output_dir) |
|||
|
|||
template = env.get_template('post_template.md') |
|||
|
|||
|
|||
for feed_url in feed_urls[7:]: |
|||
feed_name = urlparse(feed_url).netloc |
|||
|
|||
etag, modified = get_etag(feed_name) |
|||
if modified: |
|||
data = feedparser.parse(feed_url, modified=modified) |
|||
elif etag: |
|||
data = feedparser.parse(feed_url, etag=etag) |
|||
else: |
|||
data = feedparser.parse(feed_url) |
|||
|
|||
print(data.status, feed_url) |
|||
|
|||
if data.status == 200: |
|||
|
|||
#write_etag(feed_url, data) |
|||
|
|||
# if 'title' in data.feed: |
|||
# print('#'*10) |
|||
# print(data.feed.title) |
|||
# print('#'*10) |
|||
# print('\n') |
|||
|
|||
# print('FEED KEYS') |
|||
# print(data.keys()) |
|||
# print('\n') |
|||
|
|||
for entry in data.entries: |
|||
# print(entry.title) |
|||
# print(entry.keys()) |
|||
# print('\n') |
|||
# # if 'tags' in entry: |
|||
# # print(entry.title, entry.tags) |
|||
|
|||
post_dir = os.path.join(output_dir, feed_name, slugify(entry.title)) |
|||
create_post(post_dir, entry) |
|||
|
|||
|
|||
|
|||
end = time.time() |
|||
|
|||
print(end - start) |
|||
|
Loading…
Reference in new issue