The tool relates to the project "No Annotation* is a Alone"; a series of annotative interventions upon the rigidity of PDF, to challenge established protocols.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
3.1 KiB

2 years ago
import os
import random
import shutil
import string
import subprocess
from pathlib import Path
from flask import Flask, flash, redirect, render_template, request, url_for
from hocrtransformpdf import *
from werkzeug.utils import secure_filename
from flask_basicauth import BasicAuth
import pdftotree
UPLOAD_FOLDER = 'static/uploads'
ALLOWED_EXTENSIONS = {'pdf'}
app = Flask(__name__)
app.config['BASIC_AUTH_USERNAME'] = 'wordmord'
app.config['BASIC_AUTH_PASSWORD'] = 'tentacles'
basic_auth = BasicAuth(app)
app.config['UPLOAD_FOLDER'] = "static/pdf"
2 years ago
@app.route('/', methods=['GET', 'POST'])
@basic_auth.required
def run_script():
# the code below was made in case I was using a button upload but now I use the field input so this has to be uploaded and then transformed
if request.method == 'POST':
# download PDF from url
file = request.form['file']
if file:
uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], file)
file.save(uploadfilepath)
return redirect(url_for('uploaded_file',file=file))
# url = request.form['url']
#if not url:
# flash('Url is required!')
# else:
# messages.append({'url': url})
# return redirect(url_for('index'))
2 years ago
# check if the post request has the file part
#if 'file' not in request.files:
# flash('No file part')
# return redirect(request.url)
#file = request.files['file']
2 years ago
# if user does not select file, browser also
# submit an empty part without filename
#if file.filename == '':
# flash('No selected file')
# return redirect(request.url)
#if file and allowed_file(file.filename):
# filename = secure_filename(file.filename)
# uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], filename)
# file.save(uploadfilepath)
2 years ago
# return redirect(url_for('uploaded_file',
# filename=filename))
hocr_result = pdftotree.parse(uploadfilepath)
app.logger.info("test")
hocr = HocrTransform(hocr_filename=hocr_result, dpi=300)
hocr.to_pdf(
out_filename='static/pdf/output-2.pdf',
image_filename='static/images/blank.png',
show_bounding_boxes=False,
interword_spaces=False,
)
2 years ago
#hocrfile='static/hocr/gynaikoktonia.hocr'
2 years ago
#hocr = HocrTransform(hocr_filename=hocrfile, dpi=300)
#hocr = HocrTransform(hocr_filename=hocr_result, dpi=300)
#hocr.to_pdf(
# out_filename='static/pdf/output.pdf',
# image_filename='static/images/blank.png',
# show_bounding_boxes=False,
# interword_spaces=False,
#)
# result = subprocess.check_output("python3 hocrtransformpdf.py -i images/blank.png hocr/gynaikoktonia.hocr pdf/gynaikoktonia.pdf", shell=True)
2 years ago
return render_template('results.html', **locals())
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
if __name__ == "__main__":
app.run()