Varia library working group XPPL.
https://gitea.xpub.nl/XPUB/XPPL
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
23 lines
731 B
23 lines
731 B
import PyPDF2
|
|
|
|
|
|
def get_text(file_path, filename):
|
|
read_pdf = file_path
|
|
write_txt = "app/uploads/" + filename + '.txt'
|
|
|
|
with open(read_pdf, 'rb') as pdf_file, open(write_txt, 'w') as text_file:
|
|
read_pdf = PyPDF2.PdfFileReader(pdf_file)
|
|
number_of_pages = read_pdf.getNumPages()
|
|
for page_number in range(number_of_pages):
|
|
page = read_pdf.getPage(page_number)
|
|
page_content = page.extractText()
|
|
text_file.write(page_content)
|
|
|
|
|
|
def extract_text(file_path, filename):
|
|
try:
|
|
get_text(file_path, filename)
|
|
except Exception:
|
|
with open(filename + '.txt', 'w') as text_file:
|
|
page_content = ""
|
|
text_file.write(page_content)
|
|
|