diff --git a/.DS_Store b/.DS_Store index a1888eb..b9d3dc6 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/bounding_box.py b/bounding_box.py old mode 100644 new mode 100755 index 0f4c481..ff55ba7 --- a/bounding_box.py +++ b/bounding_box.py @@ -29,6 +29,6 @@ while True: d+=1 except: - logging.exception("message") + # logging.exception("message") print("All pages must be ready!") break diff --git a/burstpdf.py b/burstpdf.py deleted file mode 100755 index 32e0e9c..0000000 --- a/burstpdf.py +++ /dev/null @@ -1,43 +0,0 @@ -#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/ - -import pdf2image -from PIL import Image -import time - -#DECLARE CONSTANTS -PDF_PATH = ("scans/out.pdf") -DPI = 72 -FIRST_PAGE = None -LAST_PAGE = None -FORMAT = 'jpg' -THREAD_COUNT = 1 -USERPWD = None -USE_CROPBOX = False -STRICT = False - -def pdftopil(): - #This method reads a pdf and converts it into a sequence of images - #PDF_PATH sets the path to the PDF file - #dpi parameter assists in adjusting the resolution of the image - #first_page parameter allows you to set a first page to be processed by pdftoppm - #last_page parameter allows you to set a last page to be processed by pdftoppm - #fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF) - #thread_count parameter allows you to set how many thread will be used for conversion. - #userpw parameter allows you to set a password to unlock the converted PDF - #use_cropbox parameter allows you to use the crop box instead of the media box when converting - #strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError - - start_time = time.time() - pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT) - print ("Time taken : " + str(time.time() - start_time)) - return pil_images - -def save_images(pil_images): - d = 0 - for image in pil_images: - image.save(("split/input%d"%d) + ".jpg") - d += 1 - -if __name__ == "__main__": - pil_images = pdftopil() - save_images(pil_images) diff --git a/change_res.sh b/change_res.sh new file mode 100755 index 0000000..0b1dd0e --- /dev/null +++ b/change_res.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +i=0 +for img in `ls scans/*.jpg`; do + convert $img -density 72 split/input$i.jpg + i=$((i+1)); +done diff --git a/delete_and_start_over.sh b/delete_and_start_over.sh new file mode 100755 index 0000000..6eda002 --- /dev/null +++ b/delete_and_start_over.sh @@ -0,0 +1,6 @@ +rm -R scans split rotated bounding_box ocred +mkdir -p scans +mv out.pdf $(date +%F-%H:%M).pdf && touch out.pdf +sleep 2 +cp *.pdf ~/Desktop +rm *.pdf diff --git a/merge_scans.sh b/merge_scans.sh deleted file mode 100755 index 21211f9..0000000 --- a/merge_scans.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -#line 3 means here -# cd "$(dirname "$0")" - -cd scans -pwd -convert *.jpg out.pdf diff --git a/mirror_crop.py b/mirror_crop.py old mode 100644 new mode 100755 diff --git a/readme.md b/readme.md old mode 100644 new mode 100755 index a417292..ea9add1 --- a/readme.md +++ b/readme.md @@ -55,7 +55,7 @@ sudo pip3 install pdf2image Pillow opencv-python pytesseract
Make all the files executable.
```bash -sudo chmod 777 merge_scans.sh workshop_stream.sh marge_files.sh +sudo chmod 777 merge_scans.sh workshop_stream.sh rename_scans.sh change_res.sh delete_and_start_over.sh ```In case you want to skip any of the scripts just comment out in the shell code, workshop_stream.sh.
@@ -116,15 +116,15 @@ mkdir bounding_box mkdir cropped ``` ###Merge the files in the directory scans -All the scans will be appended to one pdf called out.pdf
+All the scans will be renamed
```bash -./merge_scans.sh +./rename_scans.sh ``` ###Burst the pdf in scans -Burst this pdf, renaming all the files so they can be iterated later.
+Change resolution of the scans so that it is lighter to process
```bash -python3 burstpdf.py +./change_res.sh ``` ###Rotate the pdfs @@ -140,7 +140,7 @@ python3 bounding_box.py ``` ###Crop the mirror -The pages are now cropped, but the mirror is still visible in the middle. I commented it out because if the cameras are positioned correctly there is no need for this step.
+The pages are now cropped, but the mirror may still be visible in the edge. This happens if the cameras are not adjusted properly. I commented it out because if the cameras are positioned correctly there is no need for this step.
```bash python3 mirror_crop.py ``` @@ -156,6 +156,12 @@ python3 tesseract_ocr.py ```bash ./merge_files.sh ``` + +##START OVER +Just run delete_and_start_over.sh and start over
+```bash +./delete_and_start_over.sh +```