Browse Source

working

master
Pedro Sá Couto 5 years ago
parent
commit
27c24672c9
  1. BIN
      .DS_Store
  2. 2
      burstpdf.py
  3. 4
      mirror_crop.py
  4. 5
      remove.sh
  5. BIN
      scans/.DS_Store
  6. 2
      tesseract_ocr.py
  7. 13
      workshop_stream.sh

BIN
.DS_Store

Binary file not shown.

2
burstpdf.py

@ -33,7 +33,7 @@ def pdftopil():
return pil_images return pil_images
def save_images(pil_images): def save_images(pil_images):
d = 1 d = 0
for image in pil_images: for image in pil_images:
image.save(("split/input%d"%d) + ".jpg") image.save(("split/input%d"%d) + ".jpg")
d += 1 d += 1

4
mirror_crop.py

@ -14,7 +14,7 @@ while True:
print("cropping even") print("cropping even")
# left, up, right, bottom # left, up, right, bottom
border = (0, 0, 68, 0) border = (0, 0, 65, 0)
finalpage = ImageOps.crop(page, border) finalpage = ImageOps.crop(page, border)
finalpage.save('cropped/page%i.jpg'%i) finalpage.save('cropped/page%i.jpg'%i)
@ -23,7 +23,7 @@ while True:
print("cropping odd") print("cropping odd")
# left, up, right, bottom # left, up, right, bottom
border = (68, 0, 0, 0) border = (65, 0, 0, 0)
finalpage = ImageOps.crop(page, border) finalpage = ImageOps.crop(page, border)
finalpage.save('cropped/page%i.jpg'%i) finalpage.save('cropped/page%i.jpg'%i)

5
remove.sh

@ -0,0 +1,5 @@
cd split
pwd
rm page0.jpg
rm -ltr | tail -1
rm .DS_Store

BIN
scans/.DS_Store

Binary file not shown.

2
tesseract_ocr.py

@ -7,7 +7,7 @@ i = 1
while True: while True:
try: try:
img = Image.open("cropped/page%i.jpg"%i) img = Image.open("bounding_box/input%i.jpg"%i)
print(img) print(img)
pdf = pytesseract.image_to_pdf_or_hocr(img, lang="eng", extension='pdf') pdf = pytesseract.image_to_pdf_or_hocr(img, lang="eng", extension='pdf')
time.sleep(1) time.sleep(1)

13
workshop_stream.sh

@ -1,12 +1,13 @@
mkdir split
mkdir rotated
mkdir ocred
mkdir bounding_box
mkdir cropped
./merge_scans.sh ./merge_scans.sh
mkdir split
python3 burstpdf.py python3 burstpdf.py
mkdir rotated
python3 rotation.py python3 rotation.py
mkdir bounding_box
python3 bounding_box.py python3 bounding_box.py
python3 mirror_crop.py mkdir cropped
# python3 mirror_crop.py
mkdir ocred
python3 tesseract_ocr.py python3 tesseract_ocr.py
./remove.sh
./merge_files.sh ./merge_files.sh

Loading…
Cancel
Save