wip: use poppler because it's the core of pdf2text

This way we can actually read the PDF contents.
This commit is contained in:
decentral1se 2023-05-10 14:08:06 +02:00
parent fd88becf55
commit 73bd0d74f1
No known key found for this signature in database
GPG Key ID: 03789458B3D0C410
4 changed files with 18 additions and 28 deletions

View File

@ -18,6 +18,7 @@ manuals and datasheets for everything in [Varia](https://varia.zone).
## Hacking
```
sudo apt-get install libpoppler-glib-dev libcairo2-dev # for github.com/kyoushuu/go-poppler
mkdir -p datasheets # fill with https://vvvvvvaria.org/~crunk/datasheets.zip
tail -f debug.log # in another terminal
go run goshmm.go

3
go.mod
View File

@ -6,7 +6,7 @@ require (
github.com/charmbracelet/bubbles v0.15.0
github.com/charmbracelet/bubbletea v0.23.1
github.com/charmbracelet/lipgloss v0.6.0
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80
github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c
github.com/sahilm/fuzzy v0.1.0
)
@ -23,6 +23,7 @@ require (
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.13.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef // indirect
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.7 // indirect

6
go.sum
View File

@ -11,10 +11,10 @@ github.com/charmbracelet/lipgloss v0.6.0 h1:1StyZB9vBSOyuZxQUcUwGr17JmojPNm87ini
github.com/charmbracelet/lipgloss v0.6.0/go.mod h1:tHh2wr34xcHjC2HCXIlGSG1jaDF0S0atAUvBMP6Ppuk=
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c h1:Tx0rgi0AplSV3N5wrZK7EC8dtNPZOGuyG+ioTp2dTn8=
github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c/go.mod h1:8d4ecx/m9sNgc36I9FHwFQxQVnqPq5GXpjdlts87Sh0=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
@ -42,6 +42,8 @@ github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/sahilm/fuzzy v0.1.0 h1:FzWGaw2Opqyu+794ZQ9SYifWv2EIXpwP4q8dY1kDAwI=
github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef h1:zpOobzu7pmkVFs/XNuhJvxHCdQrGWxybcfVDabi//B4=
github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef/go.mod h1:yLTJg56omDJ+JVxZ5whpCrZgQdaSs+OBdFa+X6ViJcI=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View File

@ -1,8 +1,6 @@
package main
import (
"bytes"
"errors"
"flag"
"fmt"
"log"
@ -14,7 +12,7 @@ import (
"github.com/charmbracelet/bubbles/viewport"
tea "github.com/charmbracelet/bubbletea"
"github.com/charmbracelet/lipgloss"
"github.com/ledongthuc/pdf"
pdf "github.com/johbar/go-poppler"
"github.com/sahilm/fuzzy"
)
@ -42,30 +40,18 @@ func handleCliFlags() {
}
func readPDF(name string) (string, error) {
file, reader, err := pdf.Open(name)
doc, err := pdf.Open(name)
if err != nil {
return "", errors.Unwrap(err)
return "", err
}
defer doc.Close()
var txt string
for i := 0; i < doc.GetNPages(); i++ {
txt += doc.GetPage(i).Text()
}
defer func() {
if e := file.Close(); e != nil {
err = e
}
}()
buf := new(bytes.Buffer)
buffer, err := reader.GetPlainText()
if err != nil {
return "", errors.Unwrap(err)
}
_, err = buf.ReadFrom(buffer)
if err != nil {
return "", errors.Unwrap(err)
}
return buf.String(), nil
return txt, nil
}
type model struct {