Varia library working group XPPL.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

24 lines
731 B

import PyPDF2
def get_text(file_path, filename):
read_pdf = file_path
write_txt = "app/uploads/" + filename + '.txt'
with open(read_pdf, 'rb') as pdf_file, open(write_txt, 'w') as text_file:
read_pdf = PyPDF2.PdfFileReader(pdf_file)
number_of_pages = read_pdf.getNumPages()
for page_number in range(number_of_pages):
page = read_pdf.getPage(page_number)
page_content = page.extractText()
def extract_text(file_path, filename):
get_text(file_path, filename)
except Exception:
with open(filename + '.txt', 'w') as text_file:
page_content = ""