|
@ -10,6 +10,7 @@ from hocrtransformpdf import * |
|
|
from werkzeug.utils import secure_filename |
|
|
from werkzeug.utils import secure_filename |
|
|
from flask_basicauth import BasicAuth |
|
|
from flask_basicauth import BasicAuth |
|
|
import pdftotree |
|
|
import pdftotree |
|
|
|
|
|
import urllib.request |
|
|
|
|
|
|
|
|
UPLOAD_FOLDER = 'static/uploads' |
|
|
UPLOAD_FOLDER = 'static/uploads' |
|
|
ALLOWED_EXTENSIONS = {'pdf'} |
|
|
ALLOWED_EXTENSIONS = {'pdf'} |
|
@ -26,65 +27,26 @@ app.config['UPLOAD_FOLDER'] = "static/pdf" |
|
|
|
|
|
|
|
|
@app.route('/', methods=['GET', 'POST']) |
|
|
@app.route('/', methods=['GET', 'POST']) |
|
|
@basic_auth.required |
|
|
@basic_auth.required |
|
|
def run_script(): |
|
|
def index(): |
|
|
|
|
|
return render_template('results.html', **locals()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/transform', methods=['POST']) |
|
|
|
|
|
@basic_auth.required |
|
|
|
|
|
def transform(): |
|
|
# the code below was made in case I was using a button upload but now I use the field input so this has to be uploaded and then transformed |
|
|
# the code below was made in case I was using a button upload but now I use the field input so this has to be uploaded and then transformed |
|
|
if request.method == 'POST': |
|
|
if request.method == 'POST': |
|
|
# download PDF from url |
|
|
content = request.get_json(silent=True) |
|
|
file = request.form['file'] |
|
|
print(content["hocr"]) |
|
|
if file: |
|
|
urllib.request.urlretrieve(content["pdf"], "static/pdf/input.pdf") |
|
|
response = urllib.request.urlopen(file) |
|
|
|
|
|
file = open("static/pdf/downloaded.pdf", 'wb') |
|
|
# the outcome of this hocr doesnt write well on the pdf, its structure doesn't fit |
|
|
file.write(response.read()) |
|
|
# hocr = subprocess.call("pdftotree static/pdf/input.pdf -o static/hocr/gynaikoktonia.hocr", shell=True) |
|
|
file.close() |
|
|
|
|
|
#uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], file) |
|
|
result = subprocess.call("python3 hocrtransformpdf.py -i static/images/blank.png static/hocr/gynaikoktonia.hocr static/pdf/result.pdf", shell=True) |
|
|
#file.save(uploadfilepath) |
|
|
|
|
|
#return redirect(url_for('uploaded_file',file=file)) |
|
|
|
|
|
# url = request.form['url'] |
|
|
|
|
|
#if not url: |
|
|
|
|
|
# flash('Url is required!') |
|
|
|
|
|
# else: |
|
|
|
|
|
# messages.append({'url': url}) |
|
|
|
|
|
# return redirect(url_for('index')) |
|
|
|
|
|
|
|
|
|
|
|
# check if the post request has the file part |
|
|
|
|
|
#if 'file' not in request.files: |
|
|
|
|
|
# flash('No file part') |
|
|
|
|
|
# return redirect(request.url) |
|
|
|
|
|
#file = request.files['file'] |
|
|
|
|
|
# if user does not select file, browser also |
|
|
|
|
|
# submit an empty part without filename |
|
|
|
|
|
#if file.filename == '': |
|
|
|
|
|
# flash('No selected file') |
|
|
|
|
|
# return redirect(request.url) |
|
|
|
|
|
#if file and allowed_file(file.filename): |
|
|
|
|
|
# filename = secure_filename(file.filename) |
|
|
|
|
|
# uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], filename) |
|
|
|
|
|
# file.save(uploadfilepath) |
|
|
|
|
|
# return redirect(url_for('uploaded_file', |
|
|
|
|
|
# filename=filename)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hocr_result = pdftotree.parse("static/pdf/downloaded.pdf") |
|
|
|
|
|
hocr = HocrTransform(hocr_filename=hocr_result, dpi=300) |
|
|
|
|
|
hocr.to_pdf( |
|
|
|
|
|
out_filename='static/pdf/output-2.pdf', |
|
|
|
|
|
image_filename='static/images/blank.png', |
|
|
|
|
|
show_bounding_boxes=False, |
|
|
|
|
|
interword_spaces=False |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
#hocrfile='static/hocr/gynaikoktonia.hocr' |
|
|
|
|
|
#hocr = HocrTransform(hocr_filename=hocrfile, dpi=300) |
|
|
|
|
|
#hocr = HocrTransform(hocr_filename=hocr_result, dpi=300) |
|
|
|
|
|
#hocr.to_pdf( |
|
|
|
|
|
# out_filename='static/pdf/output.pdf', |
|
|
|
|
|
# image_filename='static/images/blank.png', |
|
|
|
|
|
# show_bounding_boxes=False, |
|
|
|
|
|
# interword_spaces=False, |
|
|
|
|
|
#) |
|
|
|
|
|
# result = subprocess.check_output("python3 hocrtransformpdf.py -i images/blank.png hocr/gynaikoktonia.hocr pdf/gynaikoktonia.pdf", shell=True) |
|
|
|
|
|
return render_template('results.html', **locals()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
d = {"url":"pdf/result.pdf"} |
|
|
|
|
|
return d |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def allowed_file(filename): |
|
|
def allowed_file(filename): |
|
|