initial commit

3 years ago · 9a17d4c8c0
5 changed files with 202 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+config_hashtag_bot.py
+*.secret
+__pycache__/*
--- a/README.md
+++ b/README.md
@ -0,0 +1,30 @@
+# lumbung.space hashtag publishing bot
+
+This script makes [Hugo page bundles](https://gohugo.io/content-management/page-bundles/) out of Hashtag feeds on a Mastodon Hometown or Glitchsoc instance.  
+
+## Install requirements
+
+`pip3 install Mastodon.py jinja2`
+
+## Setup
+
+This script requires access to an account on said Mastodon instance. This instance and the credentials can be set in `config_hashtag_bot.py`.
+
+If it is the first time you are running the script, you need to register the application on the Mastodon instance. Have a look at the [Mastodon.py documentation](https://mastodonpy.readthedocs.io/en/stable/#module-mastodon) for how to do that.
+
+This bot only uses read permissions.
+
+Set which hashtags you want to publish by adding them to the list `hashtags` in `config_hashtag_bot.py`. Omit the '#'.
+
+## What it does
+
+* The Bot only looks at the **local timeline** for posts under each hashtag configured in `config_hashtag_bot.py`.
+* This means posts need to be **public** or directly addressed to the bot
+* This script respects the mental model of 'local only' posts in the sense that people do not expect them to appear elsewhere. So **local only posts are ignored**
+* It takes only posts with Media attached and then only those with images
+
+## What it doesn't do
+
+* Different types of media or embeds
+* No thread recreation, each post is treated as a top level post
+ 
--- a/config_hashtag_bot.py
+++ b/config_hashtag_bot.py
@ -0,0 +1,18 @@
+import os 
+
+# Which instance to login to
+instance = 'https://social.lumbung.space'
+
+# n.b. if it is the first time you use this script
+# You need to register the app:
+# https://mastodonpy.readthedocs.io/en/stable/#module-mastodon
+
+# Login credentials for bot account
+email = ''
+password = ''
+
+# Which hashtags to publish
+hashtags  = ['jalansesama']
+
+# your Hugo content directory
+output_dir = os.environ.get('OUTPUT_DIR', 'path/to/hugo/content')
--- a/post_template.md
+++ b/post_template.md
@ -0,0 +1,14 @@
+---
+date: "{{ post_metadata.created_at }}" #2021-06-10T10:46:33+02:00
+draft: false
+author: "{{ post_metadata.account.display_name }}"
+avatar: "{{ post_metadata.account.avatar }}"
+categories: ["shouts"]
+tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}]
+---
+
+{% for item in post_metadata.media_attachments %}
+<img src="{{item.url | localize_media_url }}" alt="{{item.description}}">
+{% endfor %}
+
+{{ post_metadata.content | filter_mastodon_urls }}
--- a/publish_hashtags.py
+++ b/publish_hashtags.py
@ -0,0 +1,137 @@
+# lumbung.space hashtag publishing bot
+# © 2021 roel roscam abbing agplv3
+# Makes Hugo posts out of hashtag feeds on Mastodon.
+# Requires an account on the Mastodon instance configured.
+# Currently does not do any thread recreation and only handles images
+
+import os
+import requests
+import shutil
+
+import jinja2
+
+from mastodon import Mastodon
+import config_hashtag_bot
+
+def login_mastodon_bot():
+    mastodon = Mastodon(
+        client_id = 'publishbot_clientcred.secret',
+        api_base_url = config_hashtag_bot.instance
+    )
+
+    mastodon.log_in(
+        config_hashtag_bot.email,
+        config_hashtag_bot.password,
+        to_file = 'publishbot_usercred.secret', scopes=['read']
+    )
+
+    return mastodon
+
+def create_frontmatter(post_metadata):
+    """
+    Parse post metadata and return it as HUGO frontmatter
+    """
+
+    frontmatter = ""
+    return frontmatter
+
+def download_media(post_directory, media_attachments):
+    """
+    Download media attached to posts. N.b. currently only images
+    See: https://mastodonpy.readthedocs.io/en/stable/#media-dicts
+    """
+
+    for item in media_attachments:
+        if item['type'] == 'image':
+            image = localize_media_url(item['url'])
+            #TODO check whether this needs to handle delete & redraft with different images 
+            if not os.path.exists(os.path.join(post_directory, image)):
+                #download image
+                response = requests.get(item['url'], stream=True)
+                with open(os.path.join(post_directory, image), 'wb') as img_file:
+                    shutil.copyfileobj(response.raw, img_file)
+                    print('Downloaded cover image', image)
+
+def create_post(post_directory, post_metadata):
+    """
+    Create Hugo posts based on Toots/posts retuned in timeline.
+    See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts
+    """
+
+    if not os.path.exists(post_directory):
+        os.mkdir(post_directory)
+
+    with open(os.path.join(post_directory,'index.html'),'w') as f:
+        post = template.render(post_metadata=post_metadata)
+        f.write(post)
+
+    download_media(post_directory, post_metadata['media_attachments'])
+
+def localize_media_url(url):
+    """
+    Returns the filename, used also as custom jinja filter
+    """
+    return url.split('/')[-1]
+
+
+def filter_mastodon_urls(content):
+    """
+    Filters out Mastodon generated URLS for tags
+    e.g. <a href="https://social.lumbung.space/tags/jalankita" class="mention hashtag" rel="tag">
+    Used also as custom jinja filter
+    """
+    #TODO
+    return content
+
+
+mastodon = login_mastodon_bot()
+
+output_dir = config_hashtag_bot.output_dir
+
+
+env = jinja2.Environment(
+    loader=jinja2.FileSystemLoader(os.path.curdir) 
+    )
+
+env.filters['localize_media_url'] = localize_media_url
+env.filters['filter_mastodon_urls'] = filter_mastodon_urls
+
+template = env.get_template('post_template.md')
+
+    
+
+if not os.path.exists(output_dir):
+    os.mkdir(output_dir)
+
+
+for hashtag in config_hashtag_bot.hashtags:
+
+    hashtag_dir = os.path.join(output_dir, hashtag)
+    if not os.path.exists(hashtag_dir):
+        os.mkdir(hashtag_dir)
+
+    existing_posts = os.listdir(hashtag_dir) #list all existing posts
+
+    timeline = mastodon.timeline_hashtag(hashtag, local=True, only_media=True) #returns max 20 queries and only with media
+    timeline = mastodon.fetch_remaining(timeline) #returns all the rest n.b. can take a while because of rate limit 
+
+    for post_metadata in timeline:
+        post_dir = os.path.join(hashtag_dir, str(post_metadata['id']))
+
+        #if there is a post in the feed we dont already have locally, make it
+        if str(post_metadata['id']) not in existing_posts: 
+
+            if not post_metadata['local_only']: #if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature
+                create_post(post_dir, post_metadata)
+
+        # if we already have the post do nothing, possibly update
+        elif str(post_metadata['id']) in existing_posts: 
+            #update_post(post_dir, post_metadata)
+            existing_posts.remove(str(post_metadata['id'])) # create list of posts which have not been returned in the feed
+        
+    for post in existing_posts:
+        print('deleted', post) #rm posts that exist but are no longer returned in feed 
+        shutil.rmtree(os.path.join(hashtag_dir,post))
+
+
+