24 lines
710 B
Python
24 lines
710 B
Python
|
import PyPDF2
|
||
|
|
||
|
|
||
|
def get_text(file_path, filename):
|
||
|
read_pdf =file_path
|
||
|
|
||
|
with open(read_pdf,'rb') as pdf_file, open("app/uploads/"+filename+'.txt', 'w') as text_file:
|
||
|
read_pdf = PyPDF2.PdfFileReader(pdf_file)
|
||
|
number_of_pages = read_pdf.getNumPages()
|
||
|
for page_number in range(number_of_pages): # use xrange in Py2
|
||
|
page = read_pdf.getPage(page_number)
|
||
|
page_content = page.extractText()
|
||
|
text_file.write(page_content)
|
||
|
|
||
|
|
||
|
|
||
|
def extract_text(file_path, filename):
|
||
|
try:
|
||
|
get_text(file_path, filename)
|
||
|
except:
|
||
|
with open(filename+'.txt', 'w') as text_file:
|
||
|
page_content = ""
|
||
|
text_file.write(page_content)
|