Pedro Sá Couto
5 years ago
8 changed files with 47 additions and 71 deletions
Binary file not shown.
@ -1,67 +0,0 @@ |
|||
import cv2 |
|||
import time |
|||
import logging |
|||
|
|||
d = 1 |
|||
|
|||
while True: |
|||
try: |
|||
threshold = 25 |
|||
time.sleep(1) |
|||
|
|||
input = ('input%d.jpg'%d) |
|||
page = ('page%d.jpg'%d) |
|||
|
|||
print("Value of d is:",d,"\n","Page name:",input) |
|||
img = cv2.imread(input, 0) # load grayscale version |
|||
|
|||
# the indeces where the useful region starts and ends |
|||
hStrart = 0 |
|||
hEnd = img.shape[0] |
|||
vStart = 0 |
|||
vEnd = img.shape[1] |
|||
|
|||
# get row and column maxes for each row and column |
|||
hMax = img.max(1) |
|||
vMax = img.max(0) |
|||
|
|||
hDone_flag = False |
|||
vDone_flag = False |
|||
|
|||
# go through the list of max and begin where the pixel value is greater |
|||
# than the threshold |
|||
for i in range(hMax.size): |
|||
if not hDone_flag: |
|||
if hMax[i] > threshold: |
|||
hStart = i |
|||
hDone_flag = True |
|||
|
|||
if hDone_flag: |
|||
if hMax[i] < threshold: |
|||
hEnd = i |
|||
break |
|||
|
|||
for i in range(vMax.size): |
|||
if not vDone_flag: |
|||
if vMax[i] > threshold: |
|||
vStart = i |
|||
vDone_flag = True |
|||
|
|||
if vDone_flag: |
|||
if vMax[i] < threshold: |
|||
vEnd = i |
|||
break |
|||
|
|||
# load the color image and choose only the useful area from it |
|||
img2 = (cv2.imread(input))[hStart:hEnd, vStart:vEnd,:] |
|||
|
|||
# write the cropped image |
|||
cv2.imwrite(page, img2) |
|||
|
|||
d+=1 |
|||
print("Value of d is:", d) |
|||
|
|||
except: |
|||
logging.exception("message") |
|||
print("All pages must be ready!") |
|||
break |
@ -0,0 +1,34 @@ |
|||
import cv2 |
|||
import logging |
|||
|
|||
d = 1 |
|||
|
|||
while True: |
|||
try: |
|||
output = ('cropped/page%d.jpg'%d) |
|||
|
|||
# Load image, convert to grayscale, and find edges |
|||
image = cv2.imread('rotated/input%d.jpg'%d) |
|||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|||
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1] |
|||
|
|||
# Find contour and sort by contour area |
|||
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|||
cnts = cnts[0] if len(cnts) == 2 else cnts[1] |
|||
cnts = sorted(cnts, key=cv2.contourArea, reverse=True) |
|||
|
|||
# Find bounding box and extract ROI |
|||
for c in cnts: |
|||
x,y,w,h = cv2.boundingRect(c) |
|||
ROI = image[y:y+h, x:x+w] |
|||
break |
|||
|
|||
cv2.imwrite(output,ROI) |
|||
cv2.waitKey() |
|||
|
|||
d+=1 |
|||
|
|||
except: |
|||
logging.exception("message") |
|||
print("All pages must be ready!") |
|||
break |
@ -0,0 +1,7 @@ |
|||
#!/bin/bash |
|||
#line 3 means here |
|||
# cd "$(dirname "$0")" |
|||
|
|||
cd scans |
|||
pwd |
|||
convert *.jpg out.pdf |
@ -1,8 +1,10 @@ |
|||
mkdir split |
|||
mkdir rotated |
|||
mkdir ocred |
|||
mkdir cropped |
|||
./merge_scans.sh |
|||
python3 burstpdf.py |
|||
python3 rotation.py |
|||
python3 crop.py |
|||
python3 mask_crop.py |
|||
python3 tesseract_ocr.py |
|||
./merge_files.sh |
|||
|
Loading…
Reference in new issue