#ltm-pelican-to-hugo-content-converter
# © 2022 Roel Roscam Abbing, released as GPLv3

# converts a Pelican post directory structure to Hugo Page Bundles
# by taking Pelican post slug, creating Hugo page bundle (a directory)
# taking Pelican post, creating slug/index.lang.md based on it
# taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md
# finding all media associated with the Pelican post and adding it to the pagebundle.
# updating the links in the index.lang.md to be relative to the files
# updating the references to other pelican posts to other hugo posts
# adding all translated versions of a Pelican post as slug/index.lang.md

# N.B. this tool will do 95% of the work but you will need to manually fix a few individual files. 


import sys
import os
import shutil
import jinja2

#the content dir of the pelican repo
base_content_dir = "/home/user/pelican-site/content"

#the posts dir of the pelican repo
post_dir = "/home/user/pelican-site/content/posts/"

#the posts dir of the hugo repo 
hugo_content_dir = "/home/user/new_hugo_site/content/posts/"

if not os.path.exists(hugo_content_dir):
	os.mkdir(hugo_content_dir)


# You need to adapt this for your own use case:
frontmatter_template = """---
title: "{{ frontmatter.title }}"
date: "{{ frontmatter.date }}"
summary: "{{ frontmatter.summary }}"
slug: "{{ frontmatter.slug }}"
lang: "{{ frontmatter.lang }}"
authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}]
categories: ["{{ frontmatter.category }}"]
tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}]
{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
{% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %}
draft: False
---
"""

template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template)


def parse_front_matter(article):
	#Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices
	#Date: 2010-03-25
	#Author: Kris De Decker
	#Category: Obsolete Technology
	#Tags: human power
	#Slug: history-of-human-powered-cranes
	#Lang: en
	#Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed
	#Status: published

	parsed_article = article

	frontmatter = {
	'title':'',
	'date':'',
	'author':'',
	'category':'',
	'tags':'',
	'slug':'',
	'lang':'',
	'summary':'',
	'status':'',
	'translator':'',
	'featured_image':''
	}

	metadatafields = {
	'Title: ':'title',
	'Date: ':'date',
	'Author: ':'author',
	'Category: ':'category',
	'Tags: ':'tags',
	'Slug: ':'slug',
	'Lang: ':'lang',
	'Summary: ':'summary',
	'Status: ':'status',
	'Translator: ':'translator'}

	for l in article:
		if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")):
			field = l.split(": ")[0]
			content = l.split(": ")[1]
			frontmatter[field.lower()] = content.strip()

	#remove frontmatter items that are empty
	frontmatter2 = frontmatter.copy()
	for v in frontmatter2.keys():
		if not frontmatter[v]:
			frontmatter.pop(v)

	if 'tags' in frontmatter.keys():
		frontmatter['tags'] = frontmatter['tags'].split(',')

	if 'summary' in frontmatter.keys():
		summary = frontmatter['summary']
		summary = summary.replace('"', r'\"')
		frontmatter['summary'] = summary

	if 'author' in frontmatter.keys():
		frontmatter['author'] = frontmatter['author'].split(',')

	if 'translator' in frontmatter.keys():
		frontmatter['translator'] = frontmatter['translator'].split(',')

	parsed_article = parsed_article[len(frontmatter.keys()):]


	return frontmatter, '\n'.join(parsed_article)

def resolve_file_links(parsed_article,article):
	#[About]({{< ref "/page/about" >}} "About Us")
	# this is VERY slow but seems to work well enough?
	for line in article:
		if r"({filename}" in line:
			fn = line[line.find('({')+1:line.find(')')]
			desc = line[line.find('[')+1:line.find(']')]
			fn = fn.strip("{filename}")
			link = line[line.find('[')+1:line.find(')')]
			ref ="{}]({{< ref '{}' >}}".format(desc, fn)
			parsed_article = parsed_article.replace(link, ref)
	return parsed_article


for root, dirs, files in os.walk(post_dir):

	for i in files:
		i = os.path.join(root, i)
		if i.endswith('.md'):
			fn, ext = os.path.splitext(i)
			article_path = os.path.join(post_dir, i)
			article = open(article_path).read().splitlines()
			new_article = open(article_path).read()
			frontmatter, parsed_article = parse_front_matter(article)
			if 'slug' in frontmatter.keys():
				page_bundle= os.path.join(hugo_content_dir, frontmatter['slug'])
			else:
				page_bundle= os.path.join(hugo_content_dir, fn)
			if not os.path.exists(page_bundle):
				os.mkdir(page_bundle)

			#copy article content to page bundle
			#copy all images to pagebundle
			first_image = False
			#parsed_article  = resolve_file_links(parsed_article, article)
			for line in article:
				if "](/images/" in line:
					image = line[line.find('(')+1:line.find(')')]
					image_source_path = os.path.join(base_content_dir, image[1:])
					image_dest_path = os.path.join(page_bundle, os.path.basename(image))
					if not os.path.exists(image_dest_path):
						try:
							shutil.copyfile(image_source_path, image_dest_path)
						except Exception as e:
							print("failed to copy file", e)
					#replace the old image paths with new relative ones
					parsed_article = parsed_article.replace(image, os.path.basename(image))
					if not first_image:
						frontmatter['featured_image'] = os.path.basename(image)
						first_image = True
			
			#copy article content to page bundle
			if 'lang' in frontmatter.keys(): # handle translations
				fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md'))
			else:
				fp = os.path.join(page_bundle, '{}'.format('index.md'))
			with open(fp, 'w') as f:
				headers = template.render(frontmatter=frontmatter)

				f.write(headers + parsed_article)
			#print(parsed_article[:15])