diff --git a/exp/files/74ahc138.pdf b/exp/files/74ahc138.pdf new file mode 100644 index 0000000..2f9f00c Binary files /dev/null and b/exp/files/74ahc138.pdf differ diff --git a/exp/readtxt/readtxt.go b/exp/readtxt/readtxt.go new file mode 100644 index 0000000..c724ae2 --- /dev/null +++ b/exp/readtxt/readtxt.go @@ -0,0 +1,34 @@ +package main + +import ( + "bytes" + "fmt" + + "github.com/ledongthuc/pdf" +) + +func main() { + pdf.DebugOn = true + content, err := readPdf("../files/74ahc138.pdf") // Read local pdf file + if err != nil { + panic(err) + } + fmt.Println(content) + return +} + +func readPdf(path string) (string, error) { + f, r, err := pdf.Open(path) + // remember close file + defer f.Close() + if err != nil { + return "", err + } + var buf bytes.Buffer + b, err := r.GetPlainText() + if err != nil { + return "", err + } + buf.ReadFrom(b) + return buf.String(), nil +} diff --git a/go.mod b/go.mod index 38a324e..d1762b9 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module varia.zone/go-sh-manymanuals go 1.18 require ( + github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 // indirect github.com/otiai10/gosseract/v2 v2.3.1 // indirect gopkg.in/gographics/imagick.v2 v2.6.0 // indirect ) diff --git a/go.sum b/go.sum index 62fafba..36d8137 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE= github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6j4vs= github.com/otiai10/gosseract/v2 v2.3.1 h1:BFy9Rru7dzqEYX7/tJuEvjVPkkJck0f+b5fYzzr6/RM=