Varia library working group XPPL. https://gitea.xpub.nl/XPUB/XPPL
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

23 lines
732 B

import PyPDF2
def get_text(file_path, filename):
read_pdf = file_path
write_txt = "xppl/uploads/" + filename + '.txt'
with open(read_pdf, 'rb') as pdf_file, open(write_txt, 'w') as text_file:
read_pdf = PyPDF2.PdfFileReader(pdf_file)
number_of_pages = read_pdf.getNumPages()
for page_number in range(number_of_pages):
page = read_pdf.getPage(page_number)
page_content = page.extractText()
text_file.write(page_content)
def extract_text(file_path, filename):
try:
get_text(file_path, filename)
except Exception:
with open(filename + '.txt', 'w') as text_file:
page_content = ""
text_file.write(page_content)