commit 9a17d4c8c010d87073f5d29798a4c48716ea11d9 Author: rra Date: Sat Nov 6 17:17:33 2021 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8afa646 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +config_hashtag_bot.py +*.secret +__pycache__/* diff --git a/README.md b/README.md new file mode 100644 index 0000000..618a3ac --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# lumbung.space hashtag publishing bot + +This script makes [Hugo page bundles](https://gohugo.io/content-management/page-bundles/) out of Hashtag feeds on a Mastodon Hometown or Glitchsoc instance. + +## Install requirements + +`pip3 install Mastodon.py jinja2` + +## Setup + +This script requires access to an account on said Mastodon instance. This instance and the credentials can be set in `config_hashtag_bot.py`. + +If it is the first time you are running the script, you need to register the application on the Mastodon instance. Have a look at the [Mastodon.py documentation](https://mastodonpy.readthedocs.io/en/stable/#module-mastodon) for how to do that. + +This bot only uses read permissions. + +Set which hashtags you want to publish by adding them to the list `hashtags` in `config_hashtag_bot.py`. Omit the '#'. + +## What it does + +* The Bot only looks at the **local timeline** for posts under each hashtag configured in `config_hashtag_bot.py`. +* This means posts need to be **public** or directly addressed to the bot +* This script respects the mental model of 'local only' posts in the sense that people do not expect them to appear elsewhere. So **local only posts are ignored** +* It takes only posts with Media attached and then only those with images + +## What it doesn't do + +* Different types of media or embeds +* No thread recreation, each post is treated as a top level post + diff --git a/config_hashtag_bot.py b/config_hashtag_bot.py new file mode 100644 index 0000000..1201115 --- /dev/null +++ b/config_hashtag_bot.py @@ -0,0 +1,18 @@ +import os + +# Which instance to login to +instance = 'https://social.lumbung.space' + +# n.b. if it is the first time you use this script +# You need to register the app: +# https://mastodonpy.readthedocs.io/en/stable/#module-mastodon + +# Login credentials for bot account +email = '' +password = '' + +# Which hashtags to publish +hashtags = ['jalansesama'] + +# your Hugo content directory +output_dir = os.environ.get('OUTPUT_DIR', 'path/to/hugo/content') diff --git a/post_template.md b/post_template.md new file mode 100644 index 0000000..6aeff3e --- /dev/null +++ b/post_template.md @@ -0,0 +1,14 @@ +--- +date: "{{ post_metadata.created_at }}" #2021-06-10T10:46:33+02:00 +draft: false +author: "{{ post_metadata.account.display_name }}" +avatar: "{{ post_metadata.account.avatar }}" +categories: ["shouts"] +tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] +--- + +{% for item in post_metadata.media_attachments %} +{{item.description}} +{% endfor %} + +{{ post_metadata.content | filter_mastodon_urls }} \ No newline at end of file diff --git a/publish_hashtags.py b/publish_hashtags.py new file mode 100644 index 0000000..09e09d7 --- /dev/null +++ b/publish_hashtags.py @@ -0,0 +1,137 @@ +# lumbung.space hashtag publishing bot +# © 2021 roel roscam abbing agplv3 +# Makes Hugo posts out of hashtag feeds on Mastodon. +# Requires an account on the Mastodon instance configured. +# Currently does not do any thread recreation and only handles images + +import os +import requests +import shutil + +import jinja2 + +from mastodon import Mastodon +import config_hashtag_bot + +def login_mastodon_bot(): + mastodon = Mastodon( + client_id = 'publishbot_clientcred.secret', + api_base_url = config_hashtag_bot.instance + ) + + mastodon.log_in( + config_hashtag_bot.email, + config_hashtag_bot.password, + to_file = 'publishbot_usercred.secret', scopes=['read'] + ) + + return mastodon + +def create_frontmatter(post_metadata): + """ + Parse post metadata and return it as HUGO frontmatter + """ + + frontmatter = "" + return frontmatter + +def download_media(post_directory, media_attachments): + """ + Download media attached to posts. N.b. currently only images + See: https://mastodonpy.readthedocs.io/en/stable/#media-dicts + """ + + for item in media_attachments: + if item['type'] == 'image': + image = localize_media_url(item['url']) + #TODO check whether this needs to handle delete & redraft with different images + if not os.path.exists(os.path.join(post_directory, image)): + #download image + response = requests.get(item['url'], stream=True) + with open(os.path.join(post_directory, image), 'wb') as img_file: + shutil.copyfileobj(response.raw, img_file) + print('Downloaded cover image', image) + +def create_post(post_directory, post_metadata): + """ + Create Hugo posts based on Toots/posts retuned in timeline. + See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts + """ + + if not os.path.exists(post_directory): + os.mkdir(post_directory) + + with open(os.path.join(post_directory,'index.html'),'w') as f: + post = template.render(post_metadata=post_metadata) + f.write(post) + + download_media(post_directory, post_metadata['media_attachments']) + +def localize_media_url(url): + """ + Returns the filename, used also as custom jinja filter + """ + return url.split('/')[-1] + + +def filter_mastodon_urls(content): + """ + Filters out Mastodon generated URLS for tags + e.g.