initial commit
This commit is contained in:
commit
8628cc66c5
5
README.md
Normal file
5
README.md
Normal file
@ -0,0 +1,5 @@
|
||||
A script to facilitate the migration from an existing site in Pelican, to one in Hugo.
|
||||
|
||||
It has much of the basic logic, but you probably need to adapt the specific metadata/frontmatter tags to your own situation.
|
||||
|
||||
Initially written for <https://test.roelof.info> and <https://solar.lowtechmagazine.com>
|
186
convert_to_hugo.py
Normal file
186
convert_to_hugo.py
Normal file
@ -0,0 +1,186 @@
|
||||
#ltm-pelican-to-hugo-content-converter
|
||||
# © 2022 Roel Roscam Abbing, released as GPLv3
|
||||
|
||||
# converts a Pelican post directory structure to Hugo Page Bundles
|
||||
# by taking Pelican post slug, creating Hugo page bundle (a directory)
|
||||
# taking Pelican post, creating slug/index.lang.md based on it
|
||||
# taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md
|
||||
# finding all media associated with the Pelican post and adding it to the pagebundle.
|
||||
# updating the links in the index.lang.md to be relative to the files
|
||||
# updating the references to other pelican posts to other hugo posts
|
||||
# adding all translated versions of a Pelican post as slug/index.lang.md
|
||||
|
||||
# N.B. this tool will do 95% of the work but you will need to manually fix a few individual files.
|
||||
|
||||
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
import jinja2
|
||||
|
||||
#the content dir of the pelican repo
|
||||
base_content_dir = "/home/user/pelican-site/content"
|
||||
|
||||
#the posts dir of the pelican repo
|
||||
post_dir = "/home/user/pelican-site/content/posts/"
|
||||
|
||||
#the posts dir of the hugo repo
|
||||
hugo_content_dir = "/home/user/new_hugo_site/content/posts/"
|
||||
|
||||
if not os.path.exists(hugo_content_dir):
|
||||
os.mkdir(hugo_content_dir)
|
||||
|
||||
|
||||
# You need to adapt this for your own use case:
|
||||
frontmatter_template = """---
|
||||
title: "{{ frontmatter.title }}"
|
||||
date: "{{ frontmatter.date }}"
|
||||
summary: "{{ frontmatter.summary }}"
|
||||
slug: "{{ frontmatter.slug }}"
|
||||
lang: "{{ frontmatter.lang }}"
|
||||
authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}]
|
||||
categories: ["{{ frontmatter.category }}"]
|
||||
tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}]
|
||||
{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
|
||||
{% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %}
|
||||
draft: False
|
||||
---
|
||||
"""
|
||||
|
||||
template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template)
|
||||
|
||||
|
||||
def parse_front_matter(article):
|
||||
#Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices
|
||||
#Date: 2010-03-25
|
||||
#Author: Kris De Decker
|
||||
#Category: Obsolete Technology
|
||||
#Tags: human power
|
||||
#Slug: history-of-human-powered-cranes
|
||||
#Lang: en
|
||||
#Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed
|
||||
#Status: published
|
||||
|
||||
parsed_article = article
|
||||
|
||||
frontmatter = {
|
||||
'title':'',
|
||||
'date':'',
|
||||
'author':'',
|
||||
'category':'',
|
||||
'tags':'',
|
||||
'slug':'',
|
||||
'lang':'',
|
||||
'summary':'',
|
||||
'status':'',
|
||||
'translator':'',
|
||||
'featured_image':''
|
||||
}
|
||||
|
||||
metadatafields = {
|
||||
'Title: ':'title',
|
||||
'Date: ':'date',
|
||||
'Author: ':'author',
|
||||
'Category: ':'category',
|
||||
'Tags: ':'tags',
|
||||
'Slug: ':'slug',
|
||||
'Lang: ':'lang',
|
||||
'Summary: ':'summary',
|
||||
'Status: ':'status',
|
||||
'Translator: ':'translator'}
|
||||
|
||||
for l in article:
|
||||
if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")):
|
||||
field = l.split(": ")[0]
|
||||
content = l.split(": ")[1]
|
||||
frontmatter[field.lower()] = content.strip()
|
||||
|
||||
#remove frontmatter items that are empty
|
||||
frontmatter2 = frontmatter.copy()
|
||||
for v in frontmatter2.keys():
|
||||
if not frontmatter[v]:
|
||||
frontmatter.pop(v)
|
||||
|
||||
if 'tags' in frontmatter.keys():
|
||||
frontmatter['tags'] = frontmatter['tags'].split(',')
|
||||
|
||||
if 'summary' in frontmatter.keys():
|
||||
summary = frontmatter['summary']
|
||||
summary = summary.replace('"', r'\"')
|
||||
frontmatter['summary'] = summary
|
||||
|
||||
if 'author' in frontmatter.keys():
|
||||
frontmatter['author'] = frontmatter['author'].split(',')
|
||||
|
||||
if 'translator' in frontmatter.keys():
|
||||
frontmatter['translator'] = frontmatter['translator'].split(',')
|
||||
|
||||
parsed_article = parsed_article[len(frontmatter.keys()):]
|
||||
|
||||
|
||||
return frontmatter, '\n'.join(parsed_article)
|
||||
|
||||
def resolve_file_links(parsed_article,article):
|
||||
#[About]({{< ref "/page/about" >}} "About Us")
|
||||
# this is VERY slow but seems to work well enough?
|
||||
for line in article:
|
||||
if r"({filename}" in line:
|
||||
fn = line[line.find('({')+1:line.find(')')]
|
||||
desc = line[line.find('[')+1:line.find(']')]
|
||||
fn = fn.strip("{filename}")
|
||||
link = line[line.find('[')+1:line.find(')')]
|
||||
ref ="{}]({{< ref '{}' >}}".format(desc, fn)
|
||||
parsed_article = parsed_article.replace(link, ref)
|
||||
return parsed_article
|
||||
|
||||
|
||||
for root, dirs, files in os.walk(post_dir):
|
||||
|
||||
for i in files:
|
||||
i = os.path.join(root, i)
|
||||
if i.endswith('.md'):
|
||||
fn, ext = os.path.splitext(i)
|
||||
article_path = os.path.join(post_dir, i)
|
||||
article = open(article_path).read().splitlines()
|
||||
new_article = open(article_path).read()
|
||||
frontmatter, parsed_article = parse_front_matter(article)
|
||||
if 'slug' in frontmatter.keys():
|
||||
page_bundle= os.path.join(hugo_content_dir, frontmatter['slug'])
|
||||
else:
|
||||
page_bundle= os.path.join(hugo_content_dir, fn)
|
||||
if not os.path.exists(page_bundle):
|
||||
os.mkdir(page_bundle)
|
||||
|
||||
#copy article content to page bundle
|
||||
#copy all images to pagebundle
|
||||
first_image = False
|
||||
#parsed_article = resolve_file_links(parsed_article, article)
|
||||
for line in article:
|
||||
if "](/images/" in line:
|
||||
image = line[line.find('(')+1:line.find(')')]
|
||||
image_source_path = os.path.join(base_content_dir, image[1:])
|
||||
image_dest_path = os.path.join(page_bundle, os.path.basename(image))
|
||||
if not os.path.exists(image_dest_path):
|
||||
try:
|
||||
shutil.copyfile(image_source_path, image_dest_path)
|
||||
except Exception as e:
|
||||
print("failed to copy file", e)
|
||||
#replace the old image paths with new relative ones
|
||||
parsed_article = parsed_article.replace(image, os.path.basename(image))
|
||||
if not first_image:
|
||||
frontmatter['featured_image'] = os.path.basename(image)
|
||||
first_image = True
|
||||
|
||||
#copy article content to page bundle
|
||||
if 'lang' in frontmatter.keys(): # handle translations
|
||||
fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md'))
|
||||
else:
|
||||
fp = os.path.join(page_bundle, '{}'.format('index.md'))
|
||||
with open(fp, 'w') as f:
|
||||
headers = template.render(frontmatter=frontmatter)
|
||||
|
||||
f.write(headers + parsed_article)
|
||||
#print(parsed_article[:15])
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user