initial commit

7 months ago · 8628cc66c5
2 changed files with 191 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,5 @@
+A script to facilitate the migration from an existing site in Pelican, to one in Hugo.
+
+It has much of the basic logic, but you probably need to adapt the specific metadata/frontmatter tags to your own situation.
+
+Initially written for <https://test.roelof.info> and <https://solar.lowtechmagazine.com>
--- a/convert_to_hugo.py
+++ b/convert_to_hugo.py
@ -0,0 +1,186 @@
+#ltm-pelican-to-hugo-content-converter
+# © 2022 Roel Roscam Abbing, released as GPLv3
+
+# converts a Pelican post directory structure to Hugo Page Bundles
+# by taking Pelican post slug, creating Hugo page bundle (a directory)
+# taking Pelican post, creating slug/index.lang.md based on it
+# taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md
+# finding all media associated with the Pelican post and adding it to the pagebundle.
+# updating the links in the index.lang.md to be relative to the files
+# updating the references to other pelican posts to other hugo posts
+# adding all translated versions of a Pelican post as slug/index.lang.md
+
+# N.B. this tool will do 95% of the work but you will need to manually fix a few individual files. 
+
+
+import sys
+import os
+import shutil
+import jinja2
+
+#the content dir of the pelican repo
+base_content_dir = "/home/user/pelican-site/content"
+
+#the posts dir of the pelican repo
+post_dir = "/home/user/pelican-site/content/posts/"
+
+#the posts dir of the hugo repo 
+hugo_content_dir = "/home/user/new_hugo_site/content/posts/"
+
+if not os.path.exists(hugo_content_dir):
+	os.mkdir(hugo_content_dir)
+
+
+# You need to adapt this for your own use case:
+frontmatter_template = """---
+title: "{{ frontmatter.title }}"
+date: "{{ frontmatter.date }}"
+summary: "{{ frontmatter.summary }}"
+slug: "{{ frontmatter.slug }}"
+lang: "{{ frontmatter.lang }}"
+authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}]
+categories: ["{{ frontmatter.category }}"]
+tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}]
+{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
+{% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %}
+draft: False
+---
+"""
+
+template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template)
+
+
+def parse_front_matter(article):
+	#Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices
+	#Date: 2010-03-25
+	#Author: Kris De Decker
+	#Category: Obsolete Technology
+	#Tags: human power
+	#Slug: history-of-human-powered-cranes
+	#Lang: en
+	#Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed
+	#Status: published
+
+	parsed_article = article
+
+	frontmatter = {
+	'title':'',
+	'date':'',
+	'author':'',
+	'category':'',
+	'tags':'',
+	'slug':'',
+	'lang':'',
+	'summary':'',
+	'status':'',
+	'translator':'',
+	'featured_image':''
+	}
+
+	metadatafields = {
+	'Title: ':'title',
+	'Date: ':'date',
+	'Author: ':'author',
+	'Category: ':'category',
+	'Tags: ':'tags',
+	'Slug: ':'slug',
+	'Lang: ':'lang',
+	'Summary: ':'summary',
+	'Status: ':'status',
+	'Translator: ':'translator'}
+
+	for l in article:
+		if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")):
+			field = l.split(": ")[0]
+			content = l.split(": ")[1]
+			frontmatter[field.lower()] = content.strip()
+
+	#remove frontmatter items that are empty
+	frontmatter2 = frontmatter.copy()
+	for v in frontmatter2.keys():
+		if not frontmatter[v]:
+			frontmatter.pop(v)
+
+	if 'tags' in frontmatter.keys():
+		frontmatter['tags'] = frontmatter['tags'].split(',')
+
+	if 'summary' in frontmatter.keys():
+		summary = frontmatter['summary']
+		summary = summary.replace('"', r'\"')
+		frontmatter['summary'] = summary
+
+	if 'author' in frontmatter.keys():
+		frontmatter['author'] = frontmatter['author'].split(',')
+
+	if 'translator' in frontmatter.keys():
+		frontmatter['translator'] = frontmatter['translator'].split(',')
+
+	parsed_article = parsed_article[len(frontmatter.keys()):]
+
+
+	return frontmatter, '\n'.join(parsed_article)
+
+def resolve_file_links(parsed_article,article):
+	#[About]({{< ref "/page/about" >}} "About Us")
+	# this is VERY slow but seems to work well enough?
+	for line in article:
+		if r"({filename}" in line:
+			fn = line[line.find('({')+1:line.find(')')]
+			desc = line[line.find('[')+1:line.find(']')]
+			fn = fn.strip("{filename}")
+			link = line[line.find('[')+1:line.find(')')]
+			ref ="{}]({{< ref '{}' >}}".format(desc, fn)
+			parsed_article = parsed_article.replace(link, ref)
+	return parsed_article
+
+
+for root, dirs, files in os.walk(post_dir):
+
+	for i in files:
+		i = os.path.join(root, i)
+		if i.endswith('.md'):
+			fn, ext = os.path.splitext(i)
+			article_path = os.path.join(post_dir, i)
+			article = open(article_path).read().splitlines()
+			new_article = open(article_path).read()
+			frontmatter, parsed_article = parse_front_matter(article)
+			if 'slug' in frontmatter.keys():
+				page_bundle= os.path.join(hugo_content_dir, frontmatter['slug'])
+			else:
+				page_bundle= os.path.join(hugo_content_dir, fn)
+			if not os.path.exists(page_bundle):
+				os.mkdir(page_bundle)
+
+			#copy article content to page bundle
+			#copy all images to pagebundle
+			first_image = False
+			#parsed_article  = resolve_file_links(parsed_article, article)
+			for line in article:
+				if "](/images/" in line:
+					image = line[line.find('(')+1:line.find(')')]
+					image_source_path = os.path.join(base_content_dir, image[1:])
+					image_dest_path = os.path.join(page_bundle, os.path.basename(image))
+					if not os.path.exists(image_dest_path):
+						try:
+							shutil.copyfile(image_source_path, image_dest_path)
+						except Exception as e:
+							print("failed to copy file", e)
+					#replace the old image paths with new relative ones
+					parsed_article = parsed_article.replace(image, os.path.basename(image))
+					if not first_image:
+						frontmatter['featured_image'] = os.path.basename(image)
+						first_image = True
+			
+			#copy article content to page bundle
+			if 'lang' in frontmatter.keys(): # handle translations
+				fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md'))
+			else:
+				fp = os.path.join(page_bundle, '{}'.format('index.md'))
+			with open(fp, 'w') as f:
+				headers = template.render(frontmatter=frontmatter)
+
+				f.write(headers + parsed_article)
+			#print(parsed_article[:15])
+
+
+