From 73bd0d74f1b983fd49b4b4f3a7fc11062aec5a15 Mon Sep 17 00:00:00 2001 From: decentral1se Date: Wed, 10 May 2023 14:08:06 +0200 Subject: [PATCH] wip: use poppler because it's the core of pdf2text This way we can actually read the PDF contents. --- README.md | 3 ++- go.mod | 3 ++- go.sum | 6 ++++-- gshmm.go | 30 ++++++++---------------------- 4 files changed, 16 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 3d7e708..51c7086 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,8 @@ manuals and datasheets for everything in [Varia](https://varia.zone). ## Hacking ``` +sudo apt-get install libpoppler-glib-dev libcairo2-dev # for github.com/kyoushuu/go-poppler mkdir -p datasheets # fill with https://vvvvvvaria.org/~crunk/datasheets.zip -tail -f debug.log # in another terminal +tail -f debug.log # in another terminal go run goshmm.go ``` diff --git a/go.mod b/go.mod index 2b1f10c..4133c97 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/charmbracelet/bubbles v0.15.0 github.com/charmbracelet/bubbletea v0.23.1 github.com/charmbracelet/lipgloss v0.6.0 - github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 + github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c github.com/sahilm/fuzzy v0.1.0 ) @@ -23,6 +23,7 @@ require ( github.com/muesli/reflow v0.3.0 // indirect github.com/muesli/termenv v0.13.0 // indirect github.com/rivo/uniseg v0.2.0 // indirect + github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef // indirect golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/text v0.3.7 // indirect diff --git a/go.sum b/go.sum index b443eaf..1580715 100644 --- a/go.sum +++ b/go.sum @@ -11,10 +11,10 @@ github.com/charmbracelet/lipgloss v0.6.0 h1:1StyZB9vBSOyuZxQUcUwGr17JmojPNm87ini github.com/charmbracelet/lipgloss v0.6.0/go.mod h1:tHh2wr34xcHjC2HCXIlGSG1jaDF0S0atAUvBMP6Ppuk= github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw= github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= +github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c h1:Tx0rgi0AplSV3N5wrZK7EC8dtNPZOGuyG+ioTp2dTn8= +github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c/go.mod h1:8d4ecx/m9sNgc36I9FHwFQxQVnqPq5GXpjdlts87Sh0= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= -github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= @@ -42,6 +42,8 @@ github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/sahilm/fuzzy v0.1.0 h1:FzWGaw2Opqyu+794ZQ9SYifWv2EIXpwP4q8dY1kDAwI= github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= +github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef h1:zpOobzu7pmkVFs/XNuhJvxHCdQrGWxybcfVDabi//B4= +github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef/go.mod h1:yLTJg56omDJ+JVxZ5whpCrZgQdaSs+OBdFa+X6ViJcI= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/gshmm.go b/gshmm.go index ee2218a..2ec5276 100644 --- a/gshmm.go +++ b/gshmm.go @@ -1,8 +1,6 @@ package main import ( - "bytes" - "errors" "flag" "fmt" "log" @@ -14,7 +12,7 @@ import ( "github.com/charmbracelet/bubbles/viewport" tea "github.com/charmbracelet/bubbletea" "github.com/charmbracelet/lipgloss" - "github.com/ledongthuc/pdf" + pdf "github.com/johbar/go-poppler" "github.com/sahilm/fuzzy" ) @@ -42,30 +40,18 @@ func handleCliFlags() { } func readPDF(name string) (string, error) { - file, reader, err := pdf.Open(name) + doc, err := pdf.Open(name) if err != nil { - return "", errors.Unwrap(err) + return "", err } + defer doc.Close() - defer func() { - if e := file.Close(); e != nil { - err = e - } - }() - - buf := new(bytes.Buffer) - buffer, err := reader.GetPlainText() - - if err != nil { - return "", errors.Unwrap(err) - } - - _, err = buf.ReadFrom(buffer) - if err != nil { - return "", errors.Unwrap(err) + var txt string + for i := 0; i < doc.GetNPages(); i++ { + txt += doc.GetPage(i).Text() } - return buf.String(), nil + return txt, nil } type model struct {