commit 8628cc66c560e9a8951f36b7ebfe71c89f3e145b Author: rra Date: Wed May 8 09:51:18 2024 +0200 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..63a832a --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +A script to facilitate the migration from an existing site in Pelican, to one in Hugo. + +It has much of the basic logic, but you probably need to adapt the specific metadata/frontmatter tags to your own situation. + +Initially written for and diff --git a/convert_to_hugo.py b/convert_to_hugo.py new file mode 100644 index 0000000..7f379d5 --- /dev/null +++ b/convert_to_hugo.py @@ -0,0 +1,186 @@ +#ltm-pelican-to-hugo-content-converter +# © 2022 Roel Roscam Abbing, released as GPLv3 + +# converts a Pelican post directory structure to Hugo Page Bundles +# by taking Pelican post slug, creating Hugo page bundle (a directory) +# taking Pelican post, creating slug/index.lang.md based on it +# taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md +# finding all media associated with the Pelican post and adding it to the pagebundle. +# updating the links in the index.lang.md to be relative to the files +# updating the references to other pelican posts to other hugo posts +# adding all translated versions of a Pelican post as slug/index.lang.md + +# N.B. this tool will do 95% of the work but you will need to manually fix a few individual files. + + +import sys +import os +import shutil +import jinja2 + +#the content dir of the pelican repo +base_content_dir = "/home/user/pelican-site/content" + +#the posts dir of the pelican repo +post_dir = "/home/user/pelican-site/content/posts/" + +#the posts dir of the hugo repo +hugo_content_dir = "/home/user/new_hugo_site/content/posts/" + +if not os.path.exists(hugo_content_dir): + os.mkdir(hugo_content_dir) + + +# You need to adapt this for your own use case: +frontmatter_template = """--- +title: "{{ frontmatter.title }}" +date: "{{ frontmatter.date }}" +summary: "{{ frontmatter.summary }}" +slug: "{{ frontmatter.slug }}" +lang: "{{ frontmatter.lang }}" +authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}] +categories: ["{{ frontmatter.category }}"] +tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}] +{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} +{% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %} +draft: False +--- +""" + +template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template) + + +def parse_front_matter(article): + #Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices + #Date: 2010-03-25 + #Author: Kris De Decker + #Category: Obsolete Technology + #Tags: human power + #Slug: history-of-human-powered-cranes + #Lang: en + #Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed + #Status: published + + parsed_article = article + + frontmatter = { + 'title':'', + 'date':'', + 'author':'', + 'category':'', + 'tags':'', + 'slug':'', + 'lang':'', + 'summary':'', + 'status':'', + 'translator':'', + 'featured_image':'' + } + + metadatafields = { + 'Title: ':'title', + 'Date: ':'date', + 'Author: ':'author', + 'Category: ':'category', + 'Tags: ':'tags', + 'Slug: ':'slug', + 'Lang: ':'lang', + 'Summary: ':'summary', + 'Status: ':'status', + 'Translator: ':'translator'} + + for l in article: + if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")): + field = l.split(": ")[0] + content = l.split(": ")[1] + frontmatter[field.lower()] = content.strip() + + #remove frontmatter items that are empty + frontmatter2 = frontmatter.copy() + for v in frontmatter2.keys(): + if not frontmatter[v]: + frontmatter.pop(v) + + if 'tags' in frontmatter.keys(): + frontmatter['tags'] = frontmatter['tags'].split(',') + + if 'summary' in frontmatter.keys(): + summary = frontmatter['summary'] + summary = summary.replace('"', r'\"') + frontmatter['summary'] = summary + + if 'author' in frontmatter.keys(): + frontmatter['author'] = frontmatter['author'].split(',') + + if 'translator' in frontmatter.keys(): + frontmatter['translator'] = frontmatter['translator'].split(',') + + parsed_article = parsed_article[len(frontmatter.keys()):] + + + return frontmatter, '\n'.join(parsed_article) + +def resolve_file_links(parsed_article,article): + #[About]({{< ref "/page/about" >}} "About Us") + # this is VERY slow but seems to work well enough? + for line in article: + if r"({filename}" in line: + fn = line[line.find('({')+1:line.find(')')] + desc = line[line.find('[')+1:line.find(']')] + fn = fn.strip("{filename}") + link = line[line.find('[')+1:line.find(')')] + ref ="{}]({{< ref '{}' >}}".format(desc, fn) + parsed_article = parsed_article.replace(link, ref) + return parsed_article + + +for root, dirs, files in os.walk(post_dir): + + for i in files: + i = os.path.join(root, i) + if i.endswith('.md'): + fn, ext = os.path.splitext(i) + article_path = os.path.join(post_dir, i) + article = open(article_path).read().splitlines() + new_article = open(article_path).read() + frontmatter, parsed_article = parse_front_matter(article) + if 'slug' in frontmatter.keys(): + page_bundle= os.path.join(hugo_content_dir, frontmatter['slug']) + else: + page_bundle= os.path.join(hugo_content_dir, fn) + if not os.path.exists(page_bundle): + os.mkdir(page_bundle) + + #copy article content to page bundle + #copy all images to pagebundle + first_image = False + #parsed_article = resolve_file_links(parsed_article, article) + for line in article: + if "](/images/" in line: + image = line[line.find('(')+1:line.find(')')] + image_source_path = os.path.join(base_content_dir, image[1:]) + image_dest_path = os.path.join(page_bundle, os.path.basename(image)) + if not os.path.exists(image_dest_path): + try: + shutil.copyfile(image_source_path, image_dest_path) + except Exception as e: + print("failed to copy file", e) + #replace the old image paths with new relative ones + parsed_article = parsed_article.replace(image, os.path.basename(image)) + if not first_image: + frontmatter['featured_image'] = os.path.basename(image) + first_image = True + + #copy article content to page bundle + if 'lang' in frontmatter.keys(): # handle translations + fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md')) + else: + fp = os.path.join(page_bundle, '{}'.format('index.md')) + with open(fp, 'w') as f: + headers = template.render(frontmatter=frontmatter) + + f.write(headers + parsed_article) + #print(parsed_article[:15]) + + +