forked from varia/go-sh-manymanuals
first go commit
This commit is contained in:
parent
5ec036679f
commit
4f35017d9d
@ -5,3 +5,11 @@
|
|||||||
## `ls.go`
|
## `ls.go`
|
||||||
|
|
||||||
Lists files in [`files`](./files/). Run with `go run ls.go`.
|
Lists files in [`files`](./files/). Run with `go run ls.go`.
|
||||||
|
|
||||||
|
## `gimmetxt.go`
|
||||||
|
|
||||||
|
WIP of gosseract pdf OCR. its far from perfect.
|
||||||
|
* gosseract doesn't OCR pdfs only imagees
|
||||||
|
* tried using gographics imagick.v2 with imagemagick6
|
||||||
|
* this only does one page of the PDF, its very slow and it doesn't even do the OCR yet.
|
||||||
|
* imagemagick6 has a security policy when it comes to pdf. it's not install and go.
|
||||||
|
64
exp/gimmetxt/gimmetxt.go
Normal file
64
exp/gimmetxt/gimmetxt.go
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"gopkg.in/gographics/imagick.v2/imagick"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
|
if err := ConvertPdfToJpg("../files/vanwiehuurik.pdf", "out.jpeg"); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
//client := gosseract.NewClient()
|
||||||
|
//defer client.Close()
|
||||||
|
|
||||||
|
//client.SetImage(file.Name())
|
||||||
|
//text, _ := client.Text()
|
||||||
|
//fmt.Println(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ConvertPdfToJpg(pdfName string, imageName string) error {
|
||||||
|
|
||||||
|
// Setup
|
||||||
|
imagick.Initialize()
|
||||||
|
defer imagick.Terminate()
|
||||||
|
|
||||||
|
mw := imagick.NewMagickWand()
|
||||||
|
defer mw.Destroy()
|
||||||
|
|
||||||
|
// Must be *before* ReadImageFile
|
||||||
|
// Make sure our image is high quality
|
||||||
|
if err := mw.SetResolution(300, 300); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load the image file into imagick
|
||||||
|
if err := mw.ReadImage(pdfName); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must be *after* ReadImageFile
|
||||||
|
// Flatten image and remove alpha channel, to prevent alpha turning black in jpg
|
||||||
|
if err := mw.SetImageAlphaChannel(imagick.ALPHA_CHANNEL_FLATTEN); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set any compression (100 = max quality)
|
||||||
|
if err := mw.SetCompressionQuality(95); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Select only first page of pdf
|
||||||
|
mw.SetIteratorIndex(0)
|
||||||
|
|
||||||
|
// Convert into JPG
|
||||||
|
if err := mw.SetFormat("jpg"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save File
|
||||||
|
return mw.WriteImage(imageName)
|
||||||
|
}
|
5
go.mod
5
go.mod
@ -1,3 +1,8 @@
|
|||||||
module varia.zone/go-sh-manymanuals
|
module varia.zone/go-sh-manymanuals
|
||||||
|
|
||||||
go 1.18
|
go 1.18
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/otiai10/gosseract/v2 v2.3.1 // indirect
|
||||||
|
gopkg.in/gographics/imagick.v2 v2.6.0 // indirect
|
||||||
|
)
|
||||||
|
11
go.sum
Normal file
11
go.sum
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE=
|
||||||
|
github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6j4vs=
|
||||||
|
github.com/otiai10/gosseract/v2 v2.3.1 h1:BFy9Rru7dzqEYX7/tJuEvjVPkkJck0f+b5fYzzr6/RM=
|
||||||
|
github.com/otiai10/gosseract/v2 v2.3.1/go.mod h1:2ZOGgdTIXQzCS5f+N1HkcXRgDX6K3ZoYe3Yvo++cpp4=
|
||||||
|
github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo=
|
||||||
|
github.com/otiai10/mint v1.3.2 h1:VYWnrP5fXmz1MXvjuUvcBrXSjGE6xjON+axB/UrpO3E=
|
||||||
|
github.com/otiai10/mint v1.3.2/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc=
|
||||||
|
gopkg.in/gographics/imagick.v2 v2.6.0 h1:ewRsUQk3QkjGumERlndbFn/kTYRjyMaPY5gxwpuAhik=
|
||||||
|
gopkg.in/gographics/imagick.v2 v2.6.0/go.mod h1:/QVPLV/iKdNttRKthmDkeeGg+vdHurVEPc8zkU0XgBk=
|
||||||
|
gopkg.in/gographics/imagick.v3 v3.4.0 h1:kSnbsXOWofo81VJEn/Hw8w3qqoOrfTyWwjAQzSdtPlg=
|
||||||
|
gopkg.in/gographics/imagick.v3 v3.4.0/go.mod h1:+Q9nyA2xRZXrDyTtJ/eko+8V/5E7bWYs08ndkZp8UmA=
|
Loading…
Reference in New Issue
Block a user