From be739267c7aad3c907da7dfd34bf80165ff56738 Mon Sep 17 00:00:00 2001 From: nglk Date: Fri, 1 Apr 2022 11:56:47 +0200 Subject: [PATCH] unsuccessful download file --- app.py.copy | 112 +++++++++++++++++++++++++++++++++++++++++ parsepdf.py | 8 +++ templates/base.html | 3 +- templates/results.html | 2 +- 4 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 app.py.copy create mode 100644 parsepdf.py diff --git a/app.py.copy b/app.py.copy new file mode 100644 index 0000000..2eecfbb --- /dev/null +++ b/app.py.copy @@ -0,0 +1,112 @@ +import os +import random +import shutil +import string +import subprocess +from pathlib import Path +from flask import Flask, flash, redirect, render_template, request, url_for +import urllib.request +from hocrtransformpdf import * +from werkzeug.utils import secure_filename +from flask_basicauth import BasicAuth +import pdftotree +from parsepdf import hocr_result + +UPLOAD_FOLDER = 'static/uploads' +ALLOWED_EXTENSIONS = {'pdf'} + +app = Flask(__name__) + +app.config['BASIC_AUTH_USERNAME'] = 'wordmord' +app.config['BASIC_AUTH_PASSWORD'] = 'tentacles' + +basic_auth = BasicAuth(app) + +app.config['UPLOAD_FOLDER'] = "static/pdf" + + +@app.route('/', methods=['GET', 'POST']) +@basic_auth.required +def run_script(): + if request.method == 'POST': + # download PDF from url + file = request.form['file'] + if file: + response = urllib.request.urlopen(file) + file = open("static/pdf/downloaded.pdf", 'wb') + file.write(response.read()) + file.close() + #hocr = pdftotree.parse("static/pdf/downloaded.pdf") + #hocrfile=open("static/hocr/downloaded.hocr", 'wb') + #hocrfile.write(hocr.read()) + #hocrfile.close + #output = execute('python parsepdf.py') + #hocr_result = pdftotree.parse("static/pdf/downloaded.pdf") + #hocr_result=hocr_result() + +#hocr = HocrTransform(hocr_result, 300) + #hocr.to_pdf( + # out_filename='static/pdf/output-2.pdf', + #image_filename='static/images/blank.png', + #show_bounding_boxes=False, + #interword_spaces=False, + #) + + #uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], file) + #file.save(uploadfilepath) + #return redirect(url_for('uploaded_file',file=file)) + # url = request.form['url'] + #if not url: + # flash('Url is required!') + # else: + # messages.append({'url': url}) + # return redirect(url_for('index')) + + # check if the post request has the file part + #if 'file' not in request.files: + # flash('No file part') + # return redirect(request.url) + #file = request.files['file'] + # if user does not select file, browser also + # submit an empty part without filename + #if file.filename == '': + # flash('No selected file') + # return redirect(request.url) + #if file and allowed_file(file.filename): + # filename = secure_filename(file.filename) + # uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], filename) + # file.save(uploadfilepath) + # return redirect(url_for('uploaded_file', + # filename=filename)) + + + # hocr_result = pdftotree.parse("static/pdf/downloaded.pdf") + #hocr = HocrTransform(hocr_filename=hocr_result, dpi=300) + #hocr.to_pdf( + # out_filename='static/pdf/output-2.pdf', + # image_filename='static/images/blank.png', + # show_bounding_boxes=False, + # interword_spaces=False + #) + + #hocrfile='static/hocr/gynaikoktonia.hocr' + #hocr = HocrTransform(hocr_filename=hocrfile, dpi=300) + #hocr = HocrTransform(hocr_filename=hocr_result, dpi=300) + #hocr.to_pdf( + # out_filename='static/pdf/output.pdf', + # image_filename='static/images/blank.png', + # show_bounding_boxes=False, + # interword_spaces=False, + #) + #subprocess.check_output("python hocrtransformpdf.py -i static/images/blank.png static/hocr/downloaded.hocr static/pdf/output-2.pdf", shell=True) + return render_template('results.html',**locals()) + + +def allowed_file(filename): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + + +if __name__ == "__main__": + app.run() + diff --git a/parsepdf.py b/parsepdf.py new file mode 100644 index 0000000..cd26866 --- /dev/null +++ b/parsepdf.py @@ -0,0 +1,8 @@ +#!venv/bin python3 +import pdftotree + +def hocr_result(): + hocr_result = pdftotree.parse("static/pdf/downloaded.pdf") + with open('static/hocr/downloaded.hocr', 'w') as f: + f.write(hocr_result) + return hocr_result diff --git a/templates/base.html b/templates/base.html index b0c899c..83be2b9 100644 --- a/templates/base.html +++ b/templates/base.html @@ -3,7 +3,8 @@ - + diff --git a/templates/results.html b/templates/results.html index 4887939..45695c1 100644 --- a/templates/results.html +++ b/templates/results.html @@ -6,7 +6,7 @@
-
+