Varia library working group XPPL. https://gitea.xpub.nl/XPUB/XPPL
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

24 lines
710 B

import PyPDF2
def get_text(file_path, filename):
read_pdf =file_path
with open(read_pdf,'rb') as pdf_file, open("app/uploads/"+filename+'.txt', 'w') as text_file:
read_pdf = PyPDF2.PdfFileReader(pdf_file)
number_of_pages = read_pdf.getNumPages()
for page_number in range(number_of_pages): # use xrange in Py2
page = read_pdf.getPage(page_number)
page_content = page.extractText()
text_file.write(page_content)
def extract_text(file_path, filename):
try:
get_text(file_path, filename)
except:
with open(filename+'.txt', 'w') as text_file:
page_content = ""
text_file.write(page_content)