You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
22 lines
476 B
22 lines
476 B
# import libraries
|
|
from PIL import Image
|
|
import pytesseract
|
|
import time
|
|
|
|
i = 1
|
|
|
|
while True:
|
|
try:
|
|
img = Image.open("split/page%i.jpg"%i)
|
|
print(img)
|
|
pdf = pytesseract.image_to_pdf_or_hocr(img, lang="eng", extension='pdf')
|
|
time.sleep(1)
|
|
file = open(("ocred/page%i.pdf"%i), "w+b")
|
|
file.write(bytearray(pdf))
|
|
file.close()
|
|
i+=1
|
|
print(i)
|
|
|
|
except:
|
|
print("All pages must be ready!")
|
|
break
|
|
|