pelican2hugo-conversion/convert_to_hugo.py


								#ltm-pelican-to-hugo-content-converter

								# © 2022 Roel Roscam Abbing, released as GPLv3


								# converts a Pelican post directory structure to Hugo Page Bundles

								# by taking Pelican post slug, creating Hugo page bundle (a directory)

								# taking Pelican post, creating slug/index.lang.md based on it

								# taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md

								# finding all media associated with the Pelican post and adding it to the pagebundle.

								# updating the links in the index.lang.md to be relative to the files

								# updating the references to other pelican posts to other hugo posts

								# adding all translated versions of a Pelican post as slug/index.lang.md


								# N.B. this tool will do 95% of the work but you will need to manually fix a few individual files.


								import sys

								import os

								import shutil

								import jinja2


								#the content dir of the pelican repo

								base_content_dir = "/home/user/pelican-site/content"


								#the posts dir of the pelican repo

								post_dir = "/home/user/pelican-site/content/posts/"


								#the posts dir of the hugo repo

								hugo_content_dir = "/home/user/new_hugo_site/content/posts/"


								if not os.path.exists(hugo_content_dir):

									os.mkdir(hugo_content_dir)


								# You need to adapt this for your own use case:

								frontmatter_template = """---

								title: "{{ frontmatter.title }}"

								date: "{{ frontmatter.date }}"

								summary: "{{ frontmatter.summary }}"

								slug: "{{ frontmatter.slug }}"

								lang: "{{ frontmatter.lang }}"

								authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}]

								categories: ["{{ frontmatter.category }}"]

								tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}]

								{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}

								{% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %}

								draft: False

								---

								"""


								template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template)


								def parse_front_matter(article):

									#Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices

									#Date: 2010-03-25

									#Author: Kris De Decker

									#Category: Obsolete Technology

									#Tags: human power

									#Slug: history-of-human-powered-cranes

									#Lang: en

									#Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed

									#Status: published


									parsed_article = article


									frontmatter = {

									'title':'',

									'date':'',

									'author':'',

									'category':'',

									'tags':'',

									'slug':'',

									'lang':'',

									'summary':'',

									'status':'',

									'translator':'',

									'featured_image':''

									}


									metadatafields = {

									'Title: ':'title',

									'Date: ':'date',

									'Author: ':'author',

									'Category: ':'category',

									'Tags: ':'tags',

									'Slug: ':'slug',

									'Lang: ':'lang',

									'Summary: ':'summary',

									'Status: ':'status',

									'Translator: ':'translator'}


									for l in article:

										if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")):

											field = l.split(": ")[0]

											content = l.split(": ")[1]

											frontmatter[field.lower()] = content.strip()


									#remove frontmatter items that are empty

									frontmatter2 = frontmatter.copy()

									for v in frontmatter2.keys():

										if not frontmatter[v]:

											frontmatter.pop(v)


									if 'tags' in frontmatter.keys():

										frontmatter['tags'] = frontmatter['tags'].split(',')


									if 'summary' in frontmatter.keys():

										summary = frontmatter['summary']

										summary = summary.replace('"', r'\"')

										frontmatter['summary'] = summary


									if 'author' in frontmatter.keys():

										frontmatter['author'] = frontmatter['author'].split(',')


									if 'translator' in frontmatter.keys():

										frontmatter['translator'] = frontmatter['translator'].split(',')


									parsed_article = parsed_article[len(frontmatter.keys()):]


									return frontmatter, '\n'.join(parsed_article)


								def resolve_file_links(parsed_article,article):

									#[About]({{< ref "/page/about" >}} "About Us")

									# this is VERY slow but seems to work well enough?

									for line in article:

										if r"({filename}" in line:

											fn = line[line.find('({')+1:line.find(')')]

											desc = line[line.find('[')+1:line.find(']')]

											fn = fn.strip("{filename}")

											link = line[line.find('[')+1:line.find(')')]

											ref ="{}]({{< ref '{}' >}}".format(desc, fn)

											parsed_article = parsed_article.replace(link, ref)

									return parsed_article


								for root, dirs, files in os.walk(post_dir):


									for i in files:

										i = os.path.join(root, i)

										if i.endswith('.md'):

											fn, ext = os.path.splitext(i)

											article_path = os.path.join(post_dir, i)

											article = open(article_path).read().splitlines()

											new_article = open(article_path).read()

											frontmatter, parsed_article = parse_front_matter(article)

											if 'slug' in frontmatter.keys():

												page_bundle= os.path.join(hugo_content_dir, frontmatter['slug'])

											else:

												page_bundle= os.path.join(hugo_content_dir, fn)

											if not os.path.exists(page_bundle):

												os.mkdir(page_bundle)


											#copy article content to page bundle

											#copy all images to pagebundle

											first_image = False

											#parsed_article  = resolve_file_links(parsed_article, article)

											for line in article:

												if "](/images/" in line:

													image = line[line.find('(')+1:line.find(')')]

													image_source_path = os.path.join(base_content_dir, image[1:])

													image_dest_path = os.path.join(page_bundle, os.path.basename(image))

													if not os.path.exists(image_dest_path):

														try:

															shutil.copyfile(image_source_path, image_dest_path)

														except Exception as e:

															print("failed to copy file", e)

													#replace the old image paths with new relative ones

													parsed_article = parsed_article.replace(image, os.path.basename(image))

													if not first_image:

														frontmatter['featured_image'] = os.path.basename(image)

														first_image = True


											#copy article content to page bundle

											if 'lang' in frontmatter.keys(): # handle translations

												fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md'))

											else:

												fp = os.path.join(page_bundle, '{}'.format('index.md'))

											with open(fp, 'w') as f:

												headers = template.render(frontmatter=frontmatter)


												f.write(headers + parsed_article)

											#print(parsed_article[:15])