varia.website/plugins/pdf/pdf.py

126 lines
3.9 KiB
Python
Raw Normal View History

2018-02-01 14:46:15 +01:00
# -*- coding: utf-8 -*-
'''
PDF Generator
-------
The pdf plugin generates PDF files from reStructuredText and Markdown sources.
'''
from __future__ import unicode_literals, print_function
from io import open
from pelican import signals
from pelican.generators import Generator
from pelican.readers import MarkdownReader
import os
import logging
logger = logging.getLogger(__name__)
import xhtml2pdf.util
if 'pyPdf' not in dir(xhtml2pdf.util):
try:
from xhtml2pdf.util import PyPDF2
xhtml2pdf.util.pyPdf = PyPDF2
except ImportError:
logger.error('Failed to monkeypatch xhtml2pdf. ' +
'You have missing dependencies')
raise
from rst2pdf.createpdf import RstToPdf
class PdfGenerator(Generator):
"Generate PDFs on the output dir, for all articles and pages"
supported_md_fields = ['date']
def __init__(self, *args, **kwargs):
super(PdfGenerator, self).__init__(*args, **kwargs)
if 'PDF_STYLE_PATH' in self.settings:
pdf_style_path = [self.settings['PDF_STYLE_PATH']]
else:
pdf_style_path = []
if 'PDF_STYLE' in self.settings:
pdf_style = [self.settings['PDF_STYLE']]
else:
pdf_style = []
self.pdfcreator = RstToPdf(breakside=0,
stylesheets=pdf_style,
style_path=pdf_style_path,
raw_html=True)
def _create_pdf(self, obj, output_path):
filename = obj.slug + '.pdf'
output_pdf = os.path.join(output_path, filename)
mdreader = MarkdownReader(self.settings)
_, ext = os.path.splitext(obj.source_path)
if ext == '.rst':
with open(obj.source_path, encoding='utf-8') as f:
text = f.read()
header = ''
elif ext[1:] in mdreader.file_extensions and mdreader.enabled:
text, meta = mdreader.read(obj.source_path)
header = ''
if 'title' in meta:
title = meta['title']
header = title + '\n' + '#' * len(title) + '\n\n'
del meta['title']
for k in meta.keys():
# We can't support all fields, so we strip the ones that won't
# look good
if k not in self.supported_md_fields:
del meta[k]
header += '\n'.join([':%s: %s' % (k, meta[k]) for k in meta])
header += '\n\n.. raw:: html\n\n\t'
text = text.replace('\n', '\n\t')
# rst2pdf casts the text to str and will break if it finds
# non-escaped characters. Here we nicely escape them to XML/HTML
# entities before proceeding
text = text.encode('ascii', 'xmlcharrefreplace')
else:
# We don't support this format
logger.warn('Ignoring unsupported file ' + obj.source_path)
return
logger.info(' [ok] writing %s' % output_pdf)
self.pdfcreator.createPdf(text=(header+text),
output=output_pdf)
def generate_context(self):
pass
def generate_output(self, writer=None):
# we don't use the writer passed as argument here
# since we write our own files
logger.info(' Generating PDF files...')
pdf_path = os.path.join(self.output_path, 'pdf')
if not os.path.exists(pdf_path):
try:
os.mkdir(pdf_path)
except OSError:
logger.error("Couldn't create the pdf output folder in " +
pdf_path)
for article in self.context['articles']:
self._create_pdf(article, pdf_path)
for page in self.context['pages']:
self._create_pdf(page, pdf_path)
def get_generators(generators):
return PdfGenerator
def register():
signals.get_generators.connect(get_generators)