initial commit

7 months ago · 8628cc66c5
2 changed files with 191 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,5 @@
 A script to facilitate the migration from an existing site in Pelican, to one in Hugo.
 It has much of the basic logic, but you probably need to adapt the specific metadata/frontmatter tags to your own situation.
 Initially written for <https://test.roelof.info> and <https://solar.lowtechmagazine.com>
--- a/convert_to_hugo.py
+++ b/convert_to_hugo.py
@ -0,0 +1,186 @@
 #ltm-pelican-to-hugo-content-converter
 # © 2022 Roel Roscam Abbing, released as GPLv3
 # converts a Pelican post directory structure to Hugo Page Bundles
 # by taking Pelican post slug, creating Hugo page bundle (a directory)
 # taking Pelican post, creating slug/index.lang.md based on it
 # taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md
 # finding all media associated with the Pelican post and adding it to the pagebundle.
 # updating the links in the index.lang.md to be relative to the files
 # updating the references to other pelican posts to other hugo posts
 # adding all translated versions of a Pelican post as slug/index.lang.md
 # N.B. this tool will do 95% of the work but you will need to manually fix a few individual files. 
 import sys
 import os
 import shutil
 import jinja2
 #the content dir of the pelican repo
 base_content_dir = "/home/user/pelican-site/content"
 #the posts dir of the pelican repo
 post_dir = "/home/user/pelican-site/content/posts/"
 #the posts dir of the hugo repo 
 hugo_content_dir = "/home/user/new_hugo_site/content/posts/"
 if not os.path.exists(hugo_content_dir):
 	os.mkdir(hugo_content_dir)
 # You need to adapt this for your own use case:
 frontmatter_template = """---
 title: "{{ frontmatter.title }}"
 date: "{{ frontmatter.date }}"
 summary: "{{ frontmatter.summary }}"
 slug: "{{ frontmatter.slug }}"
 lang: "{{ frontmatter.lang }}"
 authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}]
 categories: ["{{ frontmatter.category }}"]
 tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}]
 {% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
 {% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %}
 draft: False
 ---
 """
 template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template)
 def parse_front_matter(article):
 	#Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices
 	#Date: 2010-03-25
 	#Author: Kris De Decker
 	#Category: Obsolete Technology
 	#Tags: human power
 	#Slug: history-of-human-powered-cranes
 	#Lang: en
 	#Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed
 	#Status: published
 	parsed_article = article
 	frontmatter = {
 	'title':'',
 	'date':'',
 	'author':'',
 	'category':'',
 	'tags':'',
 	'slug':'',
 	'lang':'',
 	'summary':'',
 	'status':'',
 	'translator':'',
 	'featured_image':''
 	}
 	metadatafields = {
 	'Title: ':'title',
 	'Date: ':'date',
 	'Author: ':'author',
 	'Category: ':'category',
 	'Tags: ':'tags',
 	'Slug: ':'slug',
 	'Lang: ':'lang',
 	'Summary: ':'summary',
 	'Status: ':'status',
 	'Translator: ':'translator'}
 	for l in article:
 		if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")):
 			field = l.split(": ")[0]
 			content = l.split(": ")[1]
 			frontmatter[field.lower()] = content.strip()
 	#remove frontmatter items that are empty
 	frontmatter2 = frontmatter.copy()
 	for v in frontmatter2.keys():
 		if not frontmatter[v]:
 			frontmatter.pop(v)
 	if 'tags' in frontmatter.keys():
 		frontmatter['tags'] = frontmatter['tags'].split(',')
 	if 'summary' in frontmatter.keys():
 		summary = frontmatter['summary']
 		summary = summary.replace('"', r'\"')
 		frontmatter['summary'] = summary
 	if 'author' in frontmatter.keys():
 		frontmatter['author'] = frontmatter['author'].split(',')
 	if 'translator' in frontmatter.keys():
 		frontmatter['translator'] = frontmatter['translator'].split(',')
 	parsed_article = parsed_article[len(frontmatter.keys()):]
 	return frontmatter, '\n'.join(parsed_article)
 def resolve_file_links(parsed_article,article):
 	#[About]({{< ref "/page/about" >}} "About Us")
 	# this is VERY slow but seems to work well enough?
 	for line in article:
 		if r"({filename}" in line:
 			fn = line[line.find('({')+1:line.find(')')]
 			desc = line[line.find('[')+1:line.find(']')]
 			fn = fn.strip("{filename}")
 			link = line[line.find('[')+1:line.find(')')]
 			ref ="{}]({{< ref '{}' >}}".format(desc, fn)
 			parsed_article = parsed_article.replace(link, ref)
 	return parsed_article
 for root, dirs, files in os.walk(post_dir):
 	for i in files:
 		i = os.path.join(root, i)
 		if i.endswith('.md'):
 			fn, ext = os.path.splitext(i)
 			article_path = os.path.join(post_dir, i)
 			article = open(article_path).read().splitlines()
 			new_article = open(article_path).read()
 			frontmatter, parsed_article = parse_front_matter(article)
 			if 'slug' in frontmatter.keys():
 				page_bundle= os.path.join(hugo_content_dir, frontmatter['slug'])
 			else:
 				page_bundle= os.path.join(hugo_content_dir, fn)
 			if not os.path.exists(page_bundle):
 				os.mkdir(page_bundle)
 			#copy article content to page bundle
 			#copy all images to pagebundle
 			first_image = False
 			#parsed_article  = resolve_file_links(parsed_article, article)
 			for line in article:
 				if "](/images/" in line:
 					image = line[line.find('(')+1:line.find(')')]
 					image_source_path = os.path.join(base_content_dir, image[1:])
 					image_dest_path = os.path.join(page_bundle, os.path.basename(image))
 					if not os.path.exists(image_dest_path):
 						try:
 							shutil.copyfile(image_source_path, image_dest_path)
 						except Exception as e:
 							print("failed to copy file", e)
 					#replace the old image paths with new relative ones
 					parsed_article = parsed_article.replace(image, os.path.basename(image))
 					if not first_image:
 						frontmatter['featured_image'] = os.path.basename(image)
 						first_image = True
 			#copy article content to page bundle
 			if 'lang' in frontmatter.keys(): # handle translations
 				fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md'))
 			else:
 				fp = os.path.join(page_bundle, '{}'.format('index.md'))
 			with open(fp, 'w') as f:
 				headers = template.render(frontmatter=frontmatter)
 				f.write(headers + parsed_article)
 			#print(parsed_article[:15])