rra
3 years ago
commit
9a17d4c8c0
5 changed files with 202 additions and 0 deletions
@ -0,0 +1,3 @@ |
|||||
|
config_hashtag_bot.py |
||||
|
*.secret |
||||
|
__pycache__/* |
@ -0,0 +1,30 @@ |
|||||
|
# lumbung.space hashtag publishing bot |
||||
|
|
||||
|
This script makes [Hugo page bundles](https://gohugo.io/content-management/page-bundles/) out of Hashtag feeds on a Mastodon Hometown or Glitchsoc instance. |
||||
|
|
||||
|
## Install requirements |
||||
|
|
||||
|
`pip3 install Mastodon.py jinja2` |
||||
|
|
||||
|
## Setup |
||||
|
|
||||
|
This script requires access to an account on said Mastodon instance. This instance and the credentials can be set in `config_hashtag_bot.py`. |
||||
|
|
||||
|
If it is the first time you are running the script, you need to register the application on the Mastodon instance. Have a look at the [Mastodon.py documentation](https://mastodonpy.readthedocs.io/en/stable/#module-mastodon) for how to do that. |
||||
|
|
||||
|
This bot only uses read permissions. |
||||
|
|
||||
|
Set which hashtags you want to publish by adding them to the list `hashtags` in `config_hashtag_bot.py`. Omit the '#'. |
||||
|
|
||||
|
## What it does |
||||
|
|
||||
|
* The Bot only looks at the **local timeline** for posts under each hashtag configured in `config_hashtag_bot.py`. |
||||
|
* This means posts need to be **public** or directly addressed to the bot |
||||
|
* This script respects the mental model of 'local only' posts in the sense that people do not expect them to appear elsewhere. So **local only posts are ignored** |
||||
|
* It takes only posts with Media attached and then only those with images |
||||
|
|
||||
|
## What it doesn't do |
||||
|
|
||||
|
* Different types of media or embeds |
||||
|
* No thread recreation, each post is treated as a top level post |
||||
|
|
@ -0,0 +1,18 @@ |
|||||
|
import os |
||||
|
|
||||
|
# Which instance to login to |
||||
|
instance = 'https://social.lumbung.space' |
||||
|
|
||||
|
# n.b. if it is the first time you use this script |
||||
|
# You need to register the app: |
||||
|
# https://mastodonpy.readthedocs.io/en/stable/#module-mastodon |
||||
|
|
||||
|
# Login credentials for bot account |
||||
|
email = '' |
||||
|
password = '' |
||||
|
|
||||
|
# Which hashtags to publish |
||||
|
hashtags = ['jalansesama'] |
||||
|
|
||||
|
# your Hugo content directory |
||||
|
output_dir = os.environ.get('OUTPUT_DIR', 'path/to/hugo/content') |
@ -0,0 +1,14 @@ |
|||||
|
--- |
||||
|
date: "{{ post_metadata.created_at }}" #2021-06-10T10:46:33+02:00 |
||||
|
draft: false |
||||
|
author: "{{ post_metadata.account.display_name }}" |
||||
|
avatar: "{{ post_metadata.account.avatar }}" |
||||
|
categories: ["shouts"] |
||||
|
tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] |
||||
|
--- |
||||
|
|
||||
|
{% for item in post_metadata.media_attachments %} |
||||
|
<img src="{{item.url | localize_media_url }}" alt="{{item.description}}"> |
||||
|
{% endfor %} |
||||
|
|
||||
|
{{ post_metadata.content | filter_mastodon_urls }} |
@ -0,0 +1,137 @@ |
|||||
|
# lumbung.space hashtag publishing bot |
||||
|
# © 2021 roel roscam abbing agplv3 |
||||
|
# Makes Hugo posts out of hashtag feeds on Mastodon. |
||||
|
# Requires an account on the Mastodon instance configured. |
||||
|
# Currently does not do any thread recreation and only handles images |
||||
|
|
||||
|
import os |
||||
|
import requests |
||||
|
import shutil |
||||
|
|
||||
|
import jinja2 |
||||
|
|
||||
|
from mastodon import Mastodon |
||||
|
import config_hashtag_bot |
||||
|
|
||||
|
def login_mastodon_bot(): |
||||
|
mastodon = Mastodon( |
||||
|
client_id = 'publishbot_clientcred.secret', |
||||
|
api_base_url = config_hashtag_bot.instance |
||||
|
) |
||||
|
|
||||
|
mastodon.log_in( |
||||
|
config_hashtag_bot.email, |
||||
|
config_hashtag_bot.password, |
||||
|
to_file = 'publishbot_usercred.secret', scopes=['read'] |
||||
|
) |
||||
|
|
||||
|
return mastodon |
||||
|
|
||||
|
def create_frontmatter(post_metadata): |
||||
|
""" |
||||
|
Parse post metadata and return it as HUGO frontmatter |
||||
|
""" |
||||
|
|
||||
|
frontmatter = "" |
||||
|
return frontmatter |
||||
|
|
||||
|
def download_media(post_directory, media_attachments): |
||||
|
""" |
||||
|
Download media attached to posts. N.b. currently only images |
||||
|
See: https://mastodonpy.readthedocs.io/en/stable/#media-dicts |
||||
|
""" |
||||
|
|
||||
|
for item in media_attachments: |
||||
|
if item['type'] == 'image': |
||||
|
image = localize_media_url(item['url']) |
||||
|
#TODO check whether this needs to handle delete & redraft with different images |
||||
|
if not os.path.exists(os.path.join(post_directory, image)): |
||||
|
#download image |
||||
|
response = requests.get(item['url'], stream=True) |
||||
|
with open(os.path.join(post_directory, image), 'wb') as img_file: |
||||
|
shutil.copyfileobj(response.raw, img_file) |
||||
|
print('Downloaded cover image', image) |
||||
|
|
||||
|
def create_post(post_directory, post_metadata): |
||||
|
""" |
||||
|
Create Hugo posts based on Toots/posts retuned in timeline. |
||||
|
See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts |
||||
|
""" |
||||
|
|
||||
|
if not os.path.exists(post_directory): |
||||
|
os.mkdir(post_directory) |
||||
|
|
||||
|
with open(os.path.join(post_directory,'index.html'),'w') as f: |
||||
|
post = template.render(post_metadata=post_metadata) |
||||
|
f.write(post) |
||||
|
|
||||
|
download_media(post_directory, post_metadata['media_attachments']) |
||||
|
|
||||
|
def localize_media_url(url): |
||||
|
""" |
||||
|
Returns the filename, used also as custom jinja filter |
||||
|
""" |
||||
|
return url.split('/')[-1] |
||||
|
|
||||
|
|
||||
|
def filter_mastodon_urls(content): |
||||
|
""" |
||||
|
Filters out Mastodon generated URLS for tags |
||||
|
e.g. <a href="https://social.lumbung.space/tags/jalankita" class="mention hashtag" rel="tag"> |
||||
|
Used also as custom jinja filter |
||||
|
""" |
||||
|
#TODO |
||||
|
return content |
||||
|
|
||||
|
|
||||
|
mastodon = login_mastodon_bot() |
||||
|
|
||||
|
output_dir = config_hashtag_bot.output_dir |
||||
|
|
||||
|
|
||||
|
env = jinja2.Environment( |
||||
|
loader=jinja2.FileSystemLoader(os.path.curdir) |
||||
|
) |
||||
|
|
||||
|
env.filters['localize_media_url'] = localize_media_url |
||||
|
env.filters['filter_mastodon_urls'] = filter_mastodon_urls |
||||
|
|
||||
|
template = env.get_template('post_template.md') |
||||
|
|
||||
|
|
||||
|
|
||||
|
if not os.path.exists(output_dir): |
||||
|
os.mkdir(output_dir) |
||||
|
|
||||
|
|
||||
|
for hashtag in config_hashtag_bot.hashtags: |
||||
|
|
||||
|
hashtag_dir = os.path.join(output_dir, hashtag) |
||||
|
if not os.path.exists(hashtag_dir): |
||||
|
os.mkdir(hashtag_dir) |
||||
|
|
||||
|
existing_posts = os.listdir(hashtag_dir) #list all existing posts |
||||
|
|
||||
|
timeline = mastodon.timeline_hashtag(hashtag, local=True, only_media=True) #returns max 20 queries and only with media |
||||
|
timeline = mastodon.fetch_remaining(timeline) #returns all the rest n.b. can take a while because of rate limit |
||||
|
|
||||
|
for post_metadata in timeline: |
||||
|
post_dir = os.path.join(hashtag_dir, str(post_metadata['id'])) |
||||
|
|
||||
|
#if there is a post in the feed we dont already have locally, make it |
||||
|
if str(post_metadata['id']) not in existing_posts: |
||||
|
|
||||
|
if not post_metadata['local_only']: #if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature |
||||
|
create_post(post_dir, post_metadata) |
||||
|
|
||||
|
# if we already have the post do nothing, possibly update |
||||
|
elif str(post_metadata['id']) in existing_posts: |
||||
|
#update_post(post_dir, post_metadata) |
||||
|
existing_posts.remove(str(post_metadata['id'])) # create list of posts which have not been returned in the feed |
||||
|
|
||||
|
for post in existing_posts: |
||||
|
print('deleted', post) #rm posts that exist but are no longer returned in feed |
||||
|
shutil.rmtree(os.path.join(hashtag_dir,post)) |
||||
|
|
||||
|
|
||||
|
|
Loading…
Reference in new issue