Varia library working group XPPL.
https://gitea.xpub.nl/XPUB/XPPL
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
24 lines
731 B
24 lines
731 B
6 years ago
|
import PyPDF2
|
||
|
|
||
|
|
||
|
def get_text(file_path, filename):
|
||
6 years ago
|
read_pdf = file_path
|
||
|
write_txt = "app/uploads/" + filename + '.txt'
|
||
6 years ago
|
|
||
6 years ago
|
with open(read_pdf, 'rb') as pdf_file, open(write_txt, 'w') as text_file:
|
||
6 years ago
|
read_pdf = PyPDF2.PdfFileReader(pdf_file)
|
||
|
number_of_pages = read_pdf.getNumPages()
|
||
6 years ago
|
for page_number in range(number_of_pages):
|
||
6 years ago
|
page = read_pdf.getPage(page_number)
|
||
|
page_content = page.extractText()
|
||
|
text_file.write(page_content)
|
||
|
|
||
|
|
||
|
def extract_text(file_path, filename):
|
||
|
try:
|
||
|
get_text(file_path, filename)
|
||
6 years ago
|
except Exception:
|
||
|
with open(filename + '.txt', 'w') as text_file:
|
||
6 years ago
|
page_content = ""
|
||
|
text_file.write(page_content)
|