You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
43 lines
1.6 KiB
43 lines
1.6 KiB
#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/
|
|
|
|
import pdf2image
|
|
from PIL import Image
|
|
import time
|
|
|
|
#DECLARE CONSTANTS
|
|
PDF_PATH = ("scans/out.pdf")
|
|
DPI = 72
|
|
FIRST_PAGE = None
|
|
LAST_PAGE = None
|
|
FORMAT = 'jpg'
|
|
THREAD_COUNT = 1
|
|
USERPWD = None
|
|
USE_CROPBOX = False
|
|
STRICT = False
|
|
|
|
def pdftopil():
|
|
#This method reads a pdf and converts it into a sequence of images
|
|
#PDF_PATH sets the path to the PDF file
|
|
#dpi parameter assists in adjusting the resolution of the image
|
|
#first_page parameter allows you to set a first page to be processed by pdftoppm
|
|
#last_page parameter allows you to set a last page to be processed by pdftoppm
|
|
#fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF)
|
|
#thread_count parameter allows you to set how many thread will be used for conversion.
|
|
#userpw parameter allows you to set a password to unlock the converted PDF
|
|
#use_cropbox parameter allows you to use the crop box instead of the media box when converting
|
|
#strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError
|
|
|
|
start_time = time.time()
|
|
pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT)
|
|
print ("Time taken : " + str(time.time() - start_time))
|
|
return pil_images
|
|
|
|
def save_images(pil_images):
|
|
d = 1
|
|
for image in pil_images:
|
|
image.save(("split/input%d"%d) + ".jpg")
|
|
d += 1
|
|
|
|
if __name__ == "__main__":
|
|
pil_images = pdftopil()
|
|
save_images(pil_images)
|
|
|