The tool relates to the project "No Annotation* is a Alone"; a series of annotative interventions upon the rigidity of PDF, to challenge established protocols.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

112 lines
3.7 KiB

import os
import random
import shutil
import string
import subprocess
from pathlib import Path
from flask import Flask, flash, redirect, render_template, request, url_for
import urllib.request
from hocrtransformpdf import *
from werkzeug.utils import secure_filename
from flask_basicauth import BasicAuth
import pdftotree
from parsepdf import hocr_result
UPLOAD_FOLDER = 'static/uploads'
ALLOWED_EXTENSIONS = {'pdf'}
app = Flask(__name__)
app.config['BASIC_AUTH_USERNAME'] = 'wordmord'
app.config['BASIC_AUTH_PASSWORD'] = 'tentacles'
basic_auth = BasicAuth(app)
app.config['UPLOAD_FOLDER'] = "static/pdf"
@app.route('/', methods=['GET', 'POST'])
@basic_auth.required
def run_script():
if request.method == 'POST':
# download PDF from url
file = request.form['file']
if file:
response = urllib.request.urlopen(file)
file = open("static/pdf/downloaded.pdf", 'wb')
file.write(response.read())
file.close()
#hocr = pdftotree.parse("static/pdf/downloaded.pdf")
#hocrfile=open("static/hocr/downloaded.hocr", 'wb')
#hocrfile.write(hocr.read())
#hocrfile.close
#output = execute('python parsepdf.py')
#hocr_result = pdftotree.parse("static/pdf/downloaded.pdf")
#hocr_result=hocr_result()
#hocr = HocrTransform(hocr_result, 300)
#hocr.to_pdf(
# out_filename='static/pdf/output-2.pdf',
#image_filename='static/images/blank.png',
#show_bounding_boxes=False,
#interword_spaces=False,
#)
#uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], file)
#file.save(uploadfilepath)
#return redirect(url_for('uploaded_file',file=file))
# url = request.form['url']
#if not url:
# flash('Url is required!')
# else:
# messages.append({'url': url})
# return redirect(url_for('index'))
# check if the post request has the file part
#if 'file' not in request.files:
# flash('No file part')
# return redirect(request.url)
#file = request.files['file']
# if user does not select file, browser also
# submit an empty part without filename
#if file.filename == '':
# flash('No selected file')
# return redirect(request.url)
#if file and allowed_file(file.filename):
# filename = secure_filename(file.filename)
# uploadfilepath=os.path.join(app.config['UPLOAD_FOLDER'], filename)
# file.save(uploadfilepath)
# return redirect(url_for('uploaded_file',
# filename=filename))
# hocr_result = pdftotree.parse("static/pdf/downloaded.pdf")
#hocr = HocrTransform(hocr_filename=hocr_result, dpi=300)
#hocr.to_pdf(
# out_filename='static/pdf/output-2.pdf',
# image_filename='static/images/blank.png',
# show_bounding_boxes=False,
# interword_spaces=False
#)
#hocrfile='static/hocr/gynaikoktonia.hocr'
#hocr = HocrTransform(hocr_filename=hocrfile, dpi=300)
#hocr = HocrTransform(hocr_filename=hocr_result, dpi=300)
#hocr.to_pdf(
# out_filename='static/pdf/output.pdf',
# image_filename='static/images/blank.png',
# show_bounding_boxes=False,
# interword_spaces=False,
#)
#subprocess.check_output("python hocrtransformpdf.py -i static/images/blank.png static/hocr/downloaded.hocr static/pdf/output-2.pdf", shell=True)
return render_template('results.html',**locals())
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
if __name__ == "__main__":
app.run()