xppl/app/extractText.py

24 lines
710 B
Python
Raw Normal View History

2018-06-10 12:15:07 +02:00
import PyPDF2
def get_text(file_path, filename):
read_pdf =file_path
with open(read_pdf,'rb') as pdf_file, open("app/uploads/"+filename+'.txt', 'w') as text_file:
read_pdf = PyPDF2.PdfFileReader(pdf_file)
number_of_pages = read_pdf.getNumPages()
for page_number in range(number_of_pages): # use xrange in Py2
page = read_pdf.getPage(page_number)
page_content = page.extractText()
text_file.write(page_content)
def extract_text(file_path, filename):
try:
get_text(file_path, filename)
except:
with open(filename+'.txt', 'w') as text_file:
page_content = ""
text_file.write(page_content)