initial commit

This commit is contained in:
rra 2024-05-08 09:51:18 +02:00
commit 8628cc66c5
2 changed files with 191 additions and 0 deletions

5
README.md Normal file
View File

@ -0,0 +1,5 @@
A script to facilitate the migration from an existing site in Pelican, to one in Hugo.
It has much of the basic logic, but you probably need to adapt the specific metadata/frontmatter tags to your own situation.
Initially written for <https://test.roelof.info> and <https://solar.lowtechmagazine.com>

186
convert_to_hugo.py Normal file
View File

@ -0,0 +1,186 @@
#ltm-pelican-to-hugo-content-converter
# © 2022 Roel Roscam Abbing, released as GPLv3
# converts a Pelican post directory structure to Hugo Page Bundles
# by taking Pelican post slug, creating Hugo page bundle (a directory)
# taking Pelican post, creating slug/index.lang.md based on it
# taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md
# finding all media associated with the Pelican post and adding it to the pagebundle.
# updating the links in the index.lang.md to be relative to the files
# updating the references to other pelican posts to other hugo posts
# adding all translated versions of a Pelican post as slug/index.lang.md
# N.B. this tool will do 95% of the work but you will need to manually fix a few individual files.
import sys
import os
import shutil
import jinja2
#the content dir of the pelican repo
base_content_dir = "/home/user/pelican-site/content"
#the posts dir of the pelican repo
post_dir = "/home/user/pelican-site/content/posts/"
#the posts dir of the hugo repo
hugo_content_dir = "/home/user/new_hugo_site/content/posts/"
if not os.path.exists(hugo_content_dir):
os.mkdir(hugo_content_dir)
# You need to adapt this for your own use case:
frontmatter_template = """---
title: "{{ frontmatter.title }}"
date: "{{ frontmatter.date }}"
summary: "{{ frontmatter.summary }}"
slug: "{{ frontmatter.slug }}"
lang: "{{ frontmatter.lang }}"
authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}]
categories: ["{{ frontmatter.category }}"]
tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}]
{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
{% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %}
draft: False
---
"""
template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template)
def parse_front_matter(article):
#Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices
#Date: 2010-03-25
#Author: Kris De Decker
#Category: Obsolete Technology
#Tags: human power
#Slug: history-of-human-powered-cranes
#Lang: en
#Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed
#Status: published
parsed_article = article
frontmatter = {
'title':'',
'date':'',
'author':'',
'category':'',
'tags':'',
'slug':'',
'lang':'',
'summary':'',
'status':'',
'translator':'',
'featured_image':''
}
metadatafields = {
'Title: ':'title',
'Date: ':'date',
'Author: ':'author',
'Category: ':'category',
'Tags: ':'tags',
'Slug: ':'slug',
'Lang: ':'lang',
'Summary: ':'summary',
'Status: ':'status',
'Translator: ':'translator'}
for l in article:
if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")):
field = l.split(": ")[0]
content = l.split(": ")[1]
frontmatter[field.lower()] = content.strip()
#remove frontmatter items that are empty
frontmatter2 = frontmatter.copy()
for v in frontmatter2.keys():
if not frontmatter[v]:
frontmatter.pop(v)
if 'tags' in frontmatter.keys():
frontmatter['tags'] = frontmatter['tags'].split(',')
if 'summary' in frontmatter.keys():
summary = frontmatter['summary']
summary = summary.replace('"', r'\"')
frontmatter['summary'] = summary
if 'author' in frontmatter.keys():
frontmatter['author'] = frontmatter['author'].split(',')
if 'translator' in frontmatter.keys():
frontmatter['translator'] = frontmatter['translator'].split(',')
parsed_article = parsed_article[len(frontmatter.keys()):]
return frontmatter, '\n'.join(parsed_article)
def resolve_file_links(parsed_article,article):
#[About]({{< ref "/page/about" >}} "About Us")
# this is VERY slow but seems to work well enough?
for line in article:
if r"({filename}" in line:
fn = line[line.find('({')+1:line.find(')')]
desc = line[line.find('[')+1:line.find(']')]
fn = fn.strip("{filename}")
link = line[line.find('[')+1:line.find(')')]
ref ="{}]({{< ref '{}' >}}".format(desc, fn)
parsed_article = parsed_article.replace(link, ref)
return parsed_article
for root, dirs, files in os.walk(post_dir):
for i in files:
i = os.path.join(root, i)
if i.endswith('.md'):
fn, ext = os.path.splitext(i)
article_path = os.path.join(post_dir, i)
article = open(article_path).read().splitlines()
new_article = open(article_path).read()
frontmatter, parsed_article = parse_front_matter(article)
if 'slug' in frontmatter.keys():
page_bundle= os.path.join(hugo_content_dir, frontmatter['slug'])
else:
page_bundle= os.path.join(hugo_content_dir, fn)
if not os.path.exists(page_bundle):
os.mkdir(page_bundle)
#copy article content to page bundle
#copy all images to pagebundle
first_image = False
#parsed_article = resolve_file_links(parsed_article, article)
for line in article:
if "](/images/" in line:
image = line[line.find('(')+1:line.find(')')]
image_source_path = os.path.join(base_content_dir, image[1:])
image_dest_path = os.path.join(page_bundle, os.path.basename(image))
if not os.path.exists(image_dest_path):
try:
shutil.copyfile(image_source_path, image_dest_path)
except Exception as e:
print("failed to copy file", e)
#replace the old image paths with new relative ones
parsed_article = parsed_article.replace(image, os.path.basename(image))
if not first_image:
frontmatter['featured_image'] = os.path.basename(image)
first_image = True
#copy article content to page bundle
if 'lang' in frontmatter.keys(): # handle translations
fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md'))
else:
fp = os.path.join(page_bundle, '{}'.format('index.md'))
with open(fp, 'w') as f:
headers = template.render(frontmatter=frontmatter)
f.write(headers + parsed_article)
#print(parsed_article[:15])