nglk
2 years ago
4 changed files with 123 additions and 2 deletions
@ -0,0 +1,112 @@ |
|||||
|
import os |
||||
|
import random |
||||
|
import shutil |
||||
|
import string |
||||
|
import subprocess |
||||
|
from pathlib import Path |
||||
|
from flask import Flask, flash, redirect, render_template, request, url_for |
||||
|
import urllib.request |
||||
|
from hocrtransformpdf import * |
||||
|
from werkzeug.utils import secure_filename |
||||
|
from flask_basicauth import BasicAuth |
||||
|
import pdftotree |
||||
|
from parsepdf import hocr_result |
||||
|
|
||||
|
UPLOAD_FOLDER = 'static/uploads' |
||||
|
ALLOWED_EXTENSIONS = {'pdf'} |
||||
|
|
||||
|
app = Flask(__name__) |
||||
|
|
||||
|
app.config['BASIC_AUTH_USERNAME'] = 'wordmord' |
||||
|
app.config['BASIC_AUTH_PASSWORD'] = 'tentacles' |
||||
|
|
||||
|
basic_auth = BasicAuth(app) |
||||
|
|
||||
|
app.config['UPLOAD_FOLDER'] = "static/pdf" |
||||
|
|
||||
|
|
||||
|
@app.route('/', methods=['GET', 'POST']) |
||||
|
@basic_auth.required |
||||
|
def run_script(): |
||||
|
if request.method == 'POST': |
||||
|
# download PDF from url |
||||
|
file = request.form['file'] |
||||
|
if file: |
||||
|
response = urllib.request.urlopen(file) |
||||
|
file = open("static/pdf/downloaded.pdf", 'wb') |
||||
|
file.write(response.read()) |
||||
|
file.close() |
||||
|
#hocr = pdftotree.parse("static/pdf/downloaded.pdf") |
||||
|
#hocrfile=open("static/hocr/downloaded.hocr", 'wb') |
||||
|
#hocrfile.write(hocr.read()) |
||||
|
#hocrfile.close |
||||
|
#output = execute('python parsepdf.py') |
||||
|
#hocr_result = pdftotree.parse("static/pdf/downloaded.pdf") |
||||
|
#hocr_result=hocr_result() |
||||
|
|
||||
|
#hocr = HocrTransform(hocr_result, 300) |
||||
|
#hocr.to_pdf( |
||||
|
# out_filename='static/pdf/output-2.pdf', |
||||
|
#image_filename='static/images/blank.png', |
||||
|
#show_bounding_boxes=False, |
||||
|
#interword_spaces=False, |
||||
|
#) |
||||
|
|
||||
|
#uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], file) |
||||
|
#file.save(uploadfilepath) |
||||
|
#return redirect(url_for('uploaded_file',file=file)) |
||||
|
# url = request.form['url'] |
||||
|
#if not url: |
||||
|
# flash('Url is required!') |
||||
|
# else: |
||||
|
# messages.append({'url': url}) |
||||
|
# return redirect(url_for('index')) |
||||
|
|
||||
|
# check if the post request has the file part |
||||
|
#if 'file' not in request.files: |
||||
|
# flash('No file part') |
||||
|
# return redirect(request.url) |
||||
|
#file = request.files['file'] |
||||
|
# if user does not select file, browser also |
||||
|
# submit an empty part without filename |
||||
|
#if file.filename == '': |
||||
|
# flash('No selected file') |
||||
|
# return redirect(request.url) |
||||
|
#if file and allowed_file(file.filename): |
||||
|
# filename = secure_filename(file.filename) |
||||
|
# uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], filename) |
||||
|
# file.save(uploadfilepath) |
||||
|
# return redirect(url_for('uploaded_file', |
||||
|
# filename=filename)) |
||||
|
|
||||
|
|
||||
|
# hocr_result = pdftotree.parse("static/pdf/downloaded.pdf") |
||||
|
#hocr = HocrTransform(hocr_filename=hocr_result, dpi=300) |
||||
|
#hocr.to_pdf( |
||||
|
# out_filename='static/pdf/output-2.pdf', |
||||
|
# image_filename='static/images/blank.png', |
||||
|
# show_bounding_boxes=False, |
||||
|
# interword_spaces=False |
||||
|
#) |
||||
|
|
||||
|
#hocrfile='static/hocr/gynaikoktonia.hocr' |
||||
|
#hocr = HocrTransform(hocr_filename=hocrfile, dpi=300) |
||||
|
#hocr = HocrTransform(hocr_filename=hocr_result, dpi=300) |
||||
|
#hocr.to_pdf( |
||||
|
# out_filename='static/pdf/output.pdf', |
||||
|
# image_filename='static/images/blank.png', |
||||
|
# show_bounding_boxes=False, |
||||
|
# interword_spaces=False, |
||||
|
#) |
||||
|
#subprocess.check_output("python hocrtransformpdf.py -i static/images/blank.png static/hocr/downloaded.hocr static/pdf/output-2.pdf", shell=True) |
||||
|
return render_template('results.html',**locals()) |
||||
|
|
||||
|
|
||||
|
def allowed_file(filename): |
||||
|
return '.' in filename and \ |
||||
|
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
app.run() |
||||
|
|
@ -0,0 +1,8 @@ |
|||||
|
#!venv/bin python3 |
||||
|
import pdftotree |
||||
|
|
||||
|
def hocr_result(): |
||||
|
hocr_result = pdftotree.parse("static/pdf/downloaded.pdf") |
||||
|
with open('static/hocr/downloaded.hocr', 'w') as f: |
||||
|
f.write(hocr_result) |
||||
|
return hocr_result |
Loading…
Reference in new issue