rra
6 months ago
commit
8628cc66c5
2 changed files with 191 additions and 0 deletions
@ -0,0 +1,5 @@ |
|||
A script to facilitate the migration from an existing site in Pelican, to one in Hugo. |
|||
|
|||
It has much of the basic logic, but you probably need to adapt the specific metadata/frontmatter tags to your own situation. |
|||
|
|||
Initially written for <https://test.roelof.info> and <https://solar.lowtechmagazine.com> |
@ -0,0 +1,186 @@ |
|||
#ltm-pelican-to-hugo-content-converter |
|||
# © 2022 Roel Roscam Abbing, released as GPLv3 |
|||
|
|||
# converts a Pelican post directory structure to Hugo Page Bundles |
|||
# by taking Pelican post slug, creating Hugo page bundle (a directory) |
|||
# taking Pelican post, creating slug/index.lang.md based on it |
|||
# taking Pelican post metadata and creating Hugo front matter for slug/index.lang.md |
|||
# finding all media associated with the Pelican post and adding it to the pagebundle. |
|||
# updating the links in the index.lang.md to be relative to the files |
|||
# updating the references to other pelican posts to other hugo posts |
|||
# adding all translated versions of a Pelican post as slug/index.lang.md |
|||
|
|||
# N.B. this tool will do 95% of the work but you will need to manually fix a few individual files. |
|||
|
|||
|
|||
import sys |
|||
import os |
|||
import shutil |
|||
import jinja2 |
|||
|
|||
#the content dir of the pelican repo |
|||
base_content_dir = "/home/user/pelican-site/content" |
|||
|
|||
#the posts dir of the pelican repo |
|||
post_dir = "/home/user/pelican-site/content/posts/" |
|||
|
|||
#the posts dir of the hugo repo |
|||
hugo_content_dir = "/home/user/new_hugo_site/content/posts/" |
|||
|
|||
if not os.path.exists(hugo_content_dir): |
|||
os.mkdir(hugo_content_dir) |
|||
|
|||
|
|||
# You need to adapt this for your own use case: |
|||
frontmatter_template = """--- |
|||
title: "{{ frontmatter.title }}" |
|||
date: "{{ frontmatter.date }}" |
|||
summary: "{{ frontmatter.summary }}" |
|||
slug: "{{ frontmatter.slug }}" |
|||
lang: "{{ frontmatter.lang }}" |
|||
authors: [{% for author in frontmatter.author %}{% if not loop.last %}"{{author}}",{% else %}"{{author}}"{% endif %} {%endfor%}] |
|||
categories: ["{{ frontmatter.category }}"] |
|||
tags: [{% for tag in frontmatter.tags %}{% if not loop.last %}"{{tag}}",{% else %}"{{tag}}"{% endif %} {%endfor%}] |
|||
{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} |
|||
{% if frontmatter.translator %}translators: [{% for translator in frontmatter.translator %}{% if not loop.last %}"{{translator}}",{% else %}"{{translator}}"{% endif %}{%endfor%}]{% endif %} |
|||
draft: False |
|||
--- |
|||
""" |
|||
|
|||
template = jinja2.Environment(loader=jinja2.BaseLoader()).from_string(frontmatter_template) |
|||
|
|||
|
|||
def parse_front_matter(article): |
|||
#Title: The Sky is the Limit: Human-Powered Cranes and Lifting Devices |
|||
#Date: 2010-03-25 |
|||
#Author: Kris De Decker |
|||
#Category: Obsolete Technology |
|||
#Tags: human power |
|||
#Slug: history-of-human-powered-cranes |
|||
#Lang: en |
|||
#Summary: The only advantage that fossil-fuelled powered cranes have brought us, is a higher lifting speed |
|||
#Status: published |
|||
|
|||
parsed_article = article |
|||
|
|||
frontmatter = { |
|||
'title':'', |
|||
'date':'', |
|||
'author':'', |
|||
'category':'', |
|||
'tags':'', |
|||
'slug':'', |
|||
'lang':'', |
|||
'summary':'', |
|||
'status':'', |
|||
'translator':'', |
|||
'featured_image':'' |
|||
} |
|||
|
|||
metadatafields = { |
|||
'Title: ':'title', |
|||
'Date: ':'date', |
|||
'Author: ':'author', |
|||
'Category: ':'category', |
|||
'Tags: ':'tags', |
|||
'Slug: ':'slug', |
|||
'Lang: ':'lang', |
|||
'Summary: ':'summary', |
|||
'Status: ':'status', |
|||
'Translator: ':'translator'} |
|||
|
|||
for l in article: |
|||
if l.startswith(("Title:", "Date:", "Author:", "Category:", "Tags:", "Slug:", "Lang:", "Summary:", "Status:", "Translator:")): |
|||
field = l.split(": ")[0] |
|||
content = l.split(": ")[1] |
|||
frontmatter[field.lower()] = content.strip() |
|||
|
|||
#remove frontmatter items that are empty |
|||
frontmatter2 = frontmatter.copy() |
|||
for v in frontmatter2.keys(): |
|||
if not frontmatter[v]: |
|||
frontmatter.pop(v) |
|||
|
|||
if 'tags' in frontmatter.keys(): |
|||
frontmatter['tags'] = frontmatter['tags'].split(',') |
|||
|
|||
if 'summary' in frontmatter.keys(): |
|||
summary = frontmatter['summary'] |
|||
summary = summary.replace('"', r'\"') |
|||
frontmatter['summary'] = summary |
|||
|
|||
if 'author' in frontmatter.keys(): |
|||
frontmatter['author'] = frontmatter['author'].split(',') |
|||
|
|||
if 'translator' in frontmatter.keys(): |
|||
frontmatter['translator'] = frontmatter['translator'].split(',') |
|||
|
|||
parsed_article = parsed_article[len(frontmatter.keys()):] |
|||
|
|||
|
|||
return frontmatter, '\n'.join(parsed_article) |
|||
|
|||
def resolve_file_links(parsed_article,article): |
|||
#[About]({{< ref "/page/about" >}} "About Us") |
|||
# this is VERY slow but seems to work well enough? |
|||
for line in article: |
|||
if r"({filename}" in line: |
|||
fn = line[line.find('({')+1:line.find(')')] |
|||
desc = line[line.find('[')+1:line.find(']')] |
|||
fn = fn.strip("{filename}") |
|||
link = line[line.find('[')+1:line.find(')')] |
|||
ref ="{}]({{< ref '{}' >}}".format(desc, fn) |
|||
parsed_article = parsed_article.replace(link, ref) |
|||
return parsed_article |
|||
|
|||
|
|||
for root, dirs, files in os.walk(post_dir): |
|||
|
|||
for i in files: |
|||
i = os.path.join(root, i) |
|||
if i.endswith('.md'): |
|||
fn, ext = os.path.splitext(i) |
|||
article_path = os.path.join(post_dir, i) |
|||
article = open(article_path).read().splitlines() |
|||
new_article = open(article_path).read() |
|||
frontmatter, parsed_article = parse_front_matter(article) |
|||
if 'slug' in frontmatter.keys(): |
|||
page_bundle= os.path.join(hugo_content_dir, frontmatter['slug']) |
|||
else: |
|||
page_bundle= os.path.join(hugo_content_dir, fn) |
|||
if not os.path.exists(page_bundle): |
|||
os.mkdir(page_bundle) |
|||
|
|||
#copy article content to page bundle |
|||
#copy all images to pagebundle |
|||
first_image = False |
|||
#parsed_article = resolve_file_links(parsed_article, article) |
|||
for line in article: |
|||
if "](/images/" in line: |
|||
image = line[line.find('(')+1:line.find(')')] |
|||
image_source_path = os.path.join(base_content_dir, image[1:]) |
|||
image_dest_path = os.path.join(page_bundle, os.path.basename(image)) |
|||
if not os.path.exists(image_dest_path): |
|||
try: |
|||
shutil.copyfile(image_source_path, image_dest_path) |
|||
except Exception as e: |
|||
print("failed to copy file", e) |
|||
#replace the old image paths with new relative ones |
|||
parsed_article = parsed_article.replace(image, os.path.basename(image)) |
|||
if not first_image: |
|||
frontmatter['featured_image'] = os.path.basename(image) |
|||
first_image = True |
|||
|
|||
#copy article content to page bundle |
|||
if 'lang' in frontmatter.keys(): # handle translations |
|||
fp = os.path.join(page_bundle, '{}.{}.{}'.format('index', frontmatter['lang'],'md')) |
|||
else: |
|||
fp = os.path.join(page_bundle, '{}'.format('index.md')) |
|||
with open(fp, 'w') as f: |
|||
headers = template.render(frontmatter=frontmatter) |
|||
|
|||
f.write(headers + parsed_article) |
|||
#print(parsed_article[:15]) |
|||
|
|||
|
|||
|
Loading…
Reference in new issue