forked from varia/go-sh-manymanuals
first go commit
This commit is contained in:
parent
5ec036679f
commit
4f35017d9d
@ -5,3 +5,11 @@
|
||||
## `ls.go`
|
||||
|
||||
Lists files in [`files`](./files/). Run with `go run ls.go`.
|
||||
|
||||
## `gimmetxt.go`
|
||||
|
||||
WIP of gosseract pdf OCR. its far from perfect.
|
||||
* gosseract doesn't OCR pdfs only imagees
|
||||
* tried using gographics imagick.v2 with imagemagick6
|
||||
* this only does one page of the PDF, its very slow and it doesn't even do the OCR yet.
|
||||
* imagemagick6 has a security policy when it comes to pdf. it's not install and go.
|
||||
|
64
exp/gimmetxt/gimmetxt.go
Normal file
64
exp/gimmetxt/gimmetxt.go
Normal file
@ -0,0 +1,64 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"gopkg.in/gographics/imagick.v2/imagick"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
if err := ConvertPdfToJpg("../files/vanwiehuurik.pdf", "out.jpeg"); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
//client := gosseract.NewClient()
|
||||
//defer client.Close()
|
||||
|
||||
//client.SetImage(file.Name())
|
||||
//text, _ := client.Text()
|
||||
//fmt.Println(text)
|
||||
}
|
||||
|
||||
func ConvertPdfToJpg(pdfName string, imageName string) error {
|
||||
|
||||
// Setup
|
||||
imagick.Initialize()
|
||||
defer imagick.Terminate()
|
||||
|
||||
mw := imagick.NewMagickWand()
|
||||
defer mw.Destroy()
|
||||
|
||||
// Must be *before* ReadImageFile
|
||||
// Make sure our image is high quality
|
||||
if err := mw.SetResolution(300, 300); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Load the image file into imagick
|
||||
if err := mw.ReadImage(pdfName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Must be *after* ReadImageFile
|
||||
// Flatten image and remove alpha channel, to prevent alpha turning black in jpg
|
||||
if err := mw.SetImageAlphaChannel(imagick.ALPHA_CHANNEL_FLATTEN); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set any compression (100 = max quality)
|
||||
if err := mw.SetCompressionQuality(95); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Select only first page of pdf
|
||||
mw.SetIteratorIndex(0)
|
||||
|
||||
// Convert into JPG
|
||||
if err := mw.SetFormat("jpg"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Save File
|
||||
return mw.WriteImage(imageName)
|
||||
}
|
5
go.mod
5
go.mod
@ -1,3 +1,8 @@
|
||||
module varia.zone/go-sh-manymanuals
|
||||
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/otiai10/gosseract/v2 v2.3.1 // indirect
|
||||
gopkg.in/gographics/imagick.v2 v2.6.0 // indirect
|
||||
)
|
||||
|
11
go.sum
Normal file
11
go.sum
Normal file
@ -0,0 +1,11 @@
|
||||
github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE=
|
||||
github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6j4vs=
|
||||
github.com/otiai10/gosseract/v2 v2.3.1 h1:BFy9Rru7dzqEYX7/tJuEvjVPkkJck0f+b5fYzzr6/RM=
|
||||
github.com/otiai10/gosseract/v2 v2.3.1/go.mod h1:2ZOGgdTIXQzCS5f+N1HkcXRgDX6K3ZoYe3Yvo++cpp4=
|
||||
github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo=
|
||||
github.com/otiai10/mint v1.3.2 h1:VYWnrP5fXmz1MXvjuUvcBrXSjGE6xjON+axB/UrpO3E=
|
||||
github.com/otiai10/mint v1.3.2/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc=
|
||||
gopkg.in/gographics/imagick.v2 v2.6.0 h1:ewRsUQk3QkjGumERlndbFn/kTYRjyMaPY5gxwpuAhik=
|
||||
gopkg.in/gographics/imagick.v2 v2.6.0/go.mod h1:/QVPLV/iKdNttRKthmDkeeGg+vdHurVEPc8zkU0XgBk=
|
||||
gopkg.in/gographics/imagick.v3 v3.4.0 h1:kSnbsXOWofo81VJEn/Hw8w3qqoOrfTyWwjAQzSdtPlg=
|
||||
gopkg.in/gographics/imagick.v3 v3.4.0/go.mod h1:+Q9nyA2xRZXrDyTtJ/eko+8V/5E7bWYs08ndkZp8UmA=
|
Loading…
Reference in New Issue
Block a user