first go commit

This commit is contained in:
crunk 2022-04-25 23:31:59 +02:00
parent 5ec036679f
commit 4f35017d9d
4 changed files with 88 additions and 0 deletions

View File

@ -5,3 +5,11 @@
## `ls.go`
Lists files in [`files`](./files/). Run with `go run ls.go`.
## `gimmetxt.go`
WIP of gosseract pdf OCR. its far from perfect.
* gosseract doesn't OCR pdfs only imagees
* tried using gographics imagick.v2 with imagemagick6
* this only does one page of the PDF, its very slow and it doesn't even do the OCR yet.
* imagemagick6 has a security policy when it comes to pdf. it's not install and go.

64
exp/gimmetxt/gimmetxt.go Normal file
View File

@ -0,0 +1,64 @@
package main
import (
"log"
"gopkg.in/gographics/imagick.v2/imagick"
)
func main() {
if err := ConvertPdfToJpg("../files/vanwiehuurik.pdf", "out.jpeg"); err != nil {
log.Fatal(err)
}
//client := gosseract.NewClient()
//defer client.Close()
//client.SetImage(file.Name())
//text, _ := client.Text()
//fmt.Println(text)
}
func ConvertPdfToJpg(pdfName string, imageName string) error {
// Setup
imagick.Initialize()
defer imagick.Terminate()
mw := imagick.NewMagickWand()
defer mw.Destroy()
// Must be *before* ReadImageFile
// Make sure our image is high quality
if err := mw.SetResolution(300, 300); err != nil {
return err
}
// Load the image file into imagick
if err := mw.ReadImage(pdfName); err != nil {
return err
}
// Must be *after* ReadImageFile
// Flatten image and remove alpha channel, to prevent alpha turning black in jpg
if err := mw.SetImageAlphaChannel(imagick.ALPHA_CHANNEL_FLATTEN); err != nil {
return err
}
// Set any compression (100 = max quality)
if err := mw.SetCompressionQuality(95); err != nil {
return err
}
// Select only first page of pdf
mw.SetIteratorIndex(0)
// Convert into JPG
if err := mw.SetFormat("jpg"); err != nil {
return err
}
// Save File
return mw.WriteImage(imageName)
}

5
go.mod
View File

@ -1,3 +1,8 @@
module varia.zone/go-sh-manymanuals
go 1.18
require (
github.com/otiai10/gosseract/v2 v2.3.1 // indirect
gopkg.in/gographics/imagick.v2 v2.6.0 // indirect
)

11
go.sum Normal file
View File

@ -0,0 +1,11 @@
github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE=
github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6j4vs=
github.com/otiai10/gosseract/v2 v2.3.1 h1:BFy9Rru7dzqEYX7/tJuEvjVPkkJck0f+b5fYzzr6/RM=
github.com/otiai10/gosseract/v2 v2.3.1/go.mod h1:2ZOGgdTIXQzCS5f+N1HkcXRgDX6K3ZoYe3Yvo++cpp4=
github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo=
github.com/otiai10/mint v1.3.2 h1:VYWnrP5fXmz1MXvjuUvcBrXSjGE6xjON+axB/UrpO3E=
github.com/otiai10/mint v1.3.2/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc=
gopkg.in/gographics/imagick.v2 v2.6.0 h1:ewRsUQk3QkjGumERlndbFn/kTYRjyMaPY5gxwpuAhik=
gopkg.in/gographics/imagick.v2 v2.6.0/go.mod h1:/QVPLV/iKdNttRKthmDkeeGg+vdHurVEPc8zkU0XgBk=
gopkg.in/gographics/imagick.v3 v3.4.0 h1:kSnbsXOWofo81VJEn/Hw8w3qqoOrfTyWwjAQzSdtPlg=
gopkg.in/gographics/imagick.v3 v3.4.0/go.mod h1:+Q9nyA2xRZXrDyTtJ/eko+8V/5E7bWYs08ndkZp8UmA=