#ltm-pelican-to-hugo-content-converter # © 2022 Roel Roscam Abbing, released as GPLv3 # converts a Pelican post directory structure to Hugo Page Bundles # by taking Pelican post slug, creating Hugo page bundle (a directory) # taking Pelican post, creating slug/index.lang.md based on it # taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md # finding all media associated with the Pelican post and adding it to the pagebundle. # updating the links in the index.lang.md to be relative to the files # updating the references to other pelican posts to other hugo posts # adding all translated versions of a Pelican post as slug/index.lang.md # N.B. this tool will do 95% of the work but you will need to manually fix a few individual files. import sys import os import shutil import jinja2 #the content dir of the pelican repo base_content_dir = "/home/user/pelican-site/content" #the posts dir of the pelican repo post_dir = "/home/user/pelican-site/content/posts/" #the posts dir of the hugo repo hugo_content_dir = "/home/user/new_hugo_site/content/posts/" if not os.path.exists(hugo_content_dir): os.mkdir(hugo_content_dir) # You need to adapt this for your own use case: frontmatter_template = """--- title: "{{ frontmatter.title }}" date: "{{ frontmatter.date }}" summary: "{{ frontmatter.summary }}" slug: "{{ frontmatter.slug }}" lang: "{{ frontmatter.lang }}" authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}] categories: ["{{ frontmatter.category }}"] tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}] {% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} {% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %} draft: False --- """ template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template) def parse_front_matter(article): #Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices #Date: 2010-03-25 #Author: Kris De Decker #Category: Obsolete Technology #Tags: human power #Slug: history-of-human-powered-cranes #Lang: en #Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed #Status: published parsed_article = article frontmatter = { 'title':'', 'date':'', 'author':'', 'category':'', 'tags':'', 'slug':'', 'lang':'', 'summary':'', 'status':'', 'translator':'', 'featured_image':'' } metadatafields = { 'Title: ':'title', 'Date: ':'date', 'Author: ':'author', 'Category: ':'category', 'Tags: ':'tags', 'Slug: ':'slug', 'Lang: ':'lang', 'Summary: ':'summary', 'Status: ':'status', 'Translator: ':'translator'} for l in article: if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")): field = l.split(": ")[0] content = l.split(": ")[1] frontmatter[field.lower()] = content.strip() #remove frontmatter items that are empty frontmatter2 = frontmatter.copy() for v in frontmatter2.keys(): if not frontmatter[v]: frontmatter.pop(v) if 'tags' in frontmatter.keys(): frontmatter['tags'] = frontmatter['tags'].split(',') if 'summary' in frontmatter.keys(): summary = frontmatter['summary'] summary = summary.replace('"', r'\"') frontmatter['summary'] = summary if 'author' in frontmatter.keys(): frontmatter['author'] = frontmatter['author'].split(',') if 'translator' in frontmatter.keys(): frontmatter['translator'] = frontmatter['translator'].split(',') parsed_article = parsed_article[len(frontmatter.keys()):] return frontmatter, '\n'.join(parsed_article) def resolve_file_links(parsed_article,article): #[About]({{< ref "/page/about" >}} "About Us") # this is VERY slow but seems to work well enough? for line in article: if r"({filename}" in line: fn = line[line.find('({')+1:line.find(')')] desc = line[line.find('[')+1:line.find(']')] fn = fn.strip("{filename}") link = line[line.find('[')+1:line.find(')')] ref ="{}]({{< ref '{}' >}}".format(desc, fn) parsed_article = parsed_article.replace(link, ref) return parsed_article for root, dirs, files in os.walk(post_dir): for i in files: i = os.path.join(root, i) if i.endswith('.md'): fn, ext = os.path.splitext(i) article_path = os.path.join(post_dir, i) article = open(article_path).read().splitlines() new_article = open(article_path).read() frontmatter, parsed_article = parse_front_matter(article) if 'slug' in frontmatter.keys(): page_bundle= os.path.join(hugo_content_dir, frontmatter['slug']) else: page_bundle= os.path.join(hugo_content_dir, fn) if not os.path.exists(page_bundle): os.mkdir(page_bundle) #copy article content to page bundle #copy all images to pagebundle first_image = False #parsed_article = resolve_file_links(parsed_article, article) for line in article: if "](/images/" in line: image = line[line.find('(')+1:line.find(')')] image_source_path = os.path.join(base_content_dir, image[1:]) image_dest_path = os.path.join(page_bundle, os.path.basename(image)) if not os.path.exists(image_dest_path): try: shutil.copyfile(image_source_path, image_dest_path) except Exception as e: print("failed to copy file", e) #replace the old image paths with new relative ones parsed_article = parsed_article.replace(image, os.path.basename(image)) if not first_image: frontmatter['featured_image'] = os.path.basename(image) first_image = True #copy article content to page bundle if 'lang' in frontmatter.keys(): # handle translations fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md')) else: fp = os.path.join(page_bundle, '{}'.format('index.md')) with open(fp, 'w') as f: headers = template.render(frontmatter=frontmatter) f.write(headers + parsed_article) #print(parsed_article[:15])