Pedro Sá Couto
4 years ago
8 changed files with 47 additions and 71 deletions
Binary file not shown.
@ -1,67 +0,0 @@ |
|||||
import cv2 |
|
||||
import time |
|
||||
import logging |
|
||||
|
|
||||
d = 1 |
|
||||
|
|
||||
while True: |
|
||||
try: |
|
||||
threshold = 25 |
|
||||
time.sleep(1) |
|
||||
|
|
||||
input = ('input%d.jpg'%d) |
|
||||
page = ('page%d.jpg'%d) |
|
||||
|
|
||||
print("Value of d is:",d,"\n","Page name:",input) |
|
||||
img = cv2.imread(input, 0) # load grayscale version |
|
||||
|
|
||||
# the indeces where the useful region starts and ends |
|
||||
hStrart = 0 |
|
||||
hEnd = img.shape[0] |
|
||||
vStart = 0 |
|
||||
vEnd = img.shape[1] |
|
||||
|
|
||||
# get row and column maxes for each row and column |
|
||||
hMax = img.max(1) |
|
||||
vMax = img.max(0) |
|
||||
|
|
||||
hDone_flag = False |
|
||||
vDone_flag = False |
|
||||
|
|
||||
# go through the list of max and begin where the pixel value is greater |
|
||||
# than the threshold |
|
||||
for i in range(hMax.size): |
|
||||
if not hDone_flag: |
|
||||
if hMax[i] > threshold: |
|
||||
hStart = i |
|
||||
hDone_flag = True |
|
||||
|
|
||||
if hDone_flag: |
|
||||
if hMax[i] < threshold: |
|
||||
hEnd = i |
|
||||
break |
|
||||
|
|
||||
for i in range(vMax.size): |
|
||||
if not vDone_flag: |
|
||||
if vMax[i] > threshold: |
|
||||
vStart = i |
|
||||
vDone_flag = True |
|
||||
|
|
||||
if vDone_flag: |
|
||||
if vMax[i] < threshold: |
|
||||
vEnd = i |
|
||||
break |
|
||||
|
|
||||
# load the color image and choose only the useful area from it |
|
||||
img2 = (cv2.imread(input))[hStart:hEnd, vStart:vEnd,:] |
|
||||
|
|
||||
# write the cropped image |
|
||||
cv2.imwrite(page, img2) |
|
||||
|
|
||||
d+=1 |
|
||||
print("Value of d is:", d) |
|
||||
|
|
||||
except: |
|
||||
logging.exception("message") |
|
||||
print("All pages must be ready!") |
|
||||
break |
|
@ -0,0 +1,34 @@ |
|||||
|
import cv2 |
||||
|
import logging |
||||
|
|
||||
|
d = 1 |
||||
|
|
||||
|
while True: |
||||
|
try: |
||||
|
output = ('cropped/page%d.jpg'%d) |
||||
|
|
||||
|
# Load image, convert to grayscale, and find edges |
||||
|
image = cv2.imread('rotated/input%d.jpg'%d) |
||||
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
||||
|
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1] |
||||
|
|
||||
|
# Find contour and sort by contour area |
||||
|
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
||||
|
cnts = cnts[0] if len(cnts) == 2 else cnts[1] |
||||
|
cnts = sorted(cnts, key=cv2.contourArea, reverse=True) |
||||
|
|
||||
|
# Find bounding box and extract ROI |
||||
|
for c in cnts: |
||||
|
x,y,w,h = cv2.boundingRect(c) |
||||
|
ROI = image[y:y+h, x:x+w] |
||||
|
break |
||||
|
|
||||
|
cv2.imwrite(output,ROI) |
||||
|
cv2.waitKey() |
||||
|
|
||||
|
d+=1 |
||||
|
|
||||
|
except: |
||||
|
logging.exception("message") |
||||
|
print("All pages must be ready!") |
||||
|
break |
@ -0,0 +1,7 @@ |
|||||
|
#!/bin/bash |
||||
|
#line 3 means here |
||||
|
# cd "$(dirname "$0")" |
||||
|
|
||||
|
cd scans |
||||
|
pwd |
||||
|
convert *.jpg out.pdf |
@ -1,8 +1,10 @@ |
|||||
mkdir split |
mkdir split |
||||
mkdir rotated |
mkdir rotated |
||||
mkdir ocred |
mkdir ocred |
||||
|
mkdir cropped |
||||
|
./merge_scans.sh |
||||
python3 burstpdf.py |
python3 burstpdf.py |
||||
python3 rotation.py |
python3 rotation.py |
||||
python3 crop.py |
python3 mask_crop.py |
||||
python3 tesseract_ocr.py |
python3 tesseract_ocr.py |
||||
./merge_files.sh |
./merge_files.sh |
||||
|
Loading…
Reference in new issue