57 lines
1.7 KiB
Python
57 lines
1.7 KiB
Python
|
import PyPDF2
|
||
|
from wand.image import Image
|
||
|
import io
|
||
|
import os
|
||
|
|
||
|
|
||
|
def pdf_page_to_png(src_pdf, pagenum = 0, resolution = 72,):
|
||
|
"""
|
||
|
Returns specified PDF page as wand.image.Image png.
|
||
|
:param PyPDF2.PdfFileReader src_pdf: PDF from which to take pages.
|
||
|
:param int pagenum: Page number to take.
|
||
|
:param int resolution: Resolution for resulting png in DPI.
|
||
|
"""
|
||
|
dst_pdf = PyPDF2.PdfFileWriter()
|
||
|
dst_pdf.addPage(src_pdf.getPage(pagenum))
|
||
|
|
||
|
pdf_bytes = io.BytesIO()
|
||
|
dst_pdf.write(pdf_bytes)
|
||
|
pdf_bytes.seek(0)
|
||
|
|
||
|
img = Image(file = pdf_bytes, resolution = resolution)
|
||
|
img.convert("png")
|
||
|
|
||
|
return img
|
||
|
|
||
|
|
||
|
def get_cover(file_path, filename):
|
||
|
# Main
|
||
|
# ====
|
||
|
print(file_path)
|
||
|
src_filename = file_path
|
||
|
|
||
|
src_pdf = PyPDF2.PdfFileReader(open(src_filename, "rb"))
|
||
|
|
||
|
# What follows is a lookup table of page numbers within sample_log.pdf and the corresponding filenames.
|
||
|
pages = [{"pagenum": 0, "filename": filename}]
|
||
|
|
||
|
# Convert each page to a png image.
|
||
|
for page in pages:
|
||
|
big_filename = "app/uploads/cover/"+page["filename"] + "_cover.png"
|
||
|
small_filename = "app/uploads/cover/"+page["filename"] + "cover_small" + ".png"
|
||
|
|
||
|
img = pdf_page_to_png(src_pdf, pagenum = page["pagenum"], resolution = 300)
|
||
|
img.save(filename = big_filename)
|
||
|
|
||
|
# Ensmallen
|
||
|
img.transform("", "200")
|
||
|
img.save(filename = small_filename)
|
||
|
|
||
|
return page["filename"] + "_cover.png"
|
||
|
|
||
|
#---
|
||
|
#epub
|
||
|
#https://ebooks.stackexchange.com/questions/6517/command-line-extraction-of-metadata-title-author-from-epub-file
|
||
|
#https://hackage.haskell.org/package/epub-tools
|
||
|
#http://stackoverflow.com/questions/9751475/extract-cover-image-from-chm-and-epub-files
|