From 8c9ec38faf15400373f7d99db6f16bc8bce00f13 Mon Sep 17 00:00:00 2001 From: decentral1se Date: Thu, 11 May 2023 16:08:29 +0200 Subject: [PATCH] feat: experimental scripts --- .gitignore | 2 +- exp/README.md | 7 ++++ exp/bleve.go | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++ exp/go.mod | 36 +++++++++++++++++++ exp/go.sum | 67 +++++++++++++++++++++++++++++++++++ 5 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 exp/README.md create mode 100644 exp/bleve.go create mode 100644 exp/go.mod create mode 100644 exp/go.sum diff --git a/.gitignore b/.gitignore index b48ff4e..ff1be9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ /datasheets/ debug.log gshmm -/*.bleve/ +*.bleve diff --git a/exp/README.md b/exp/README.md new file mode 100644 index 0000000..5417cd5 --- /dev/null +++ b/exp/README.md @@ -0,0 +1,7 @@ +# gshmm-exp + +> Experimental programs for prototyping outside of the main `gshmm` UI + +``` +go run bleve.go +``` diff --git a/exp/bleve.go b/exp/bleve.go new file mode 100644 index 0000000..82d8cf8 --- /dev/null +++ b/exp/bleve.go @@ -0,0 +1,98 @@ +package main + +import ( + "fmt" + "log" + "os" + "path/filepath" + "strings" + + "github.com/blevesearch/bleve/v2" + pdf "github.com/johbar/go-poppler" +) + +func readPDF(name string) (string, error) { + doc, err := pdf.Open(name) + if err != nil { + return "", err + } + defer doc.Close() + + var txt string + for i := 0; i < doc.GetNPages(); i++ { + txt += doc.GetPage(i).Text() + } + + return txt, nil +} + +type datasheet struct { + filename string + filepath string + contents string +} + +func main() { + // create or open a bleve index + var index bleve.Index + var err error + mapping := bleve.NewIndexMapping() + index, err = bleve.New("test.bleve", mapping) + if err != nil { + index, err = bleve.Open("test.bleve") + if err != nil { + log.Fatal(err) + } + } + + // gather all datasheets - filename, filepath, contents + var datasheets []datasheet + if err := filepath.Walk("../datasheets", func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + name := info.Name() + if strings.HasSuffix(name, "pdf") { + contents, err := readPDF(path) + if err != nil { + return err + } + + datasheet := datasheet{ + filename: name, + filepath: path, + contents: contents, + } + + datasheets = append(datasheets, datasheet) + } + + return nil + }); err != nil { + log.Fatal(err) + } + + // index by filename and by content. no idea if this is The Way To Go + for _, datasheet := range datasheets { + contents, err := readPDF(datasheet.filepath) + if err != nil { + log.Fatal(err) + } + + if err := index.Index(datasheet.filename, contents); err != nil { + log.Fatal(err) + } + } + + // query for something! change the string to test other possibilities + query := bleve.NewMatchQuery("Enhanced-Page-Mode") + search := bleve.NewSearchRequest(query) + searchResults, err := index.Search(search) + if err != nil { + log.Fatal(err) + } + + // print out the results + fmt.Println(searchResults) +} diff --git a/exp/go.mod b/exp/go.mod new file mode 100644 index 0000000..2ed9e55 --- /dev/null +++ b/exp/go.mod @@ -0,0 +1,36 @@ +module varia.zone/gshmm-exp + +go 1.20 + +require ( + github.com/blevesearch/bleve/v2 v2.3.7 + github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c +) + +require ( + github.com/RoaringBitmap/roaring v0.9.4 // indirect + github.com/bits-and-blooms/bitset v1.2.0 // indirect + github.com/blevesearch/bleve_index_api v1.0.5 // indirect + github.com/blevesearch/geo v0.1.17 // indirect + github.com/blevesearch/go-porterstemmer v1.0.3 // indirect + github.com/blevesearch/gtreap v0.1.1 // indirect + github.com/blevesearch/mmap-go v1.0.4 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.1.4 // indirect + github.com/blevesearch/segment v0.9.1 // indirect + github.com/blevesearch/snowballstem v0.9.0 // indirect + github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect + github.com/blevesearch/vellum v1.0.9 // indirect + github.com/blevesearch/zapx/v11 v11.3.7 // indirect + github.com/blevesearch/zapx/v12 v12.3.7 // indirect + github.com/blevesearch/zapx/v13 v13.3.7 // indirect + github.com/blevesearch/zapx/v14 v14.3.7 // indirect + github.com/blevesearch/zapx/v15 v15.3.9 // indirect + github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect + github.com/golang/protobuf v1.3.2 // indirect + github.com/golang/snappy v0.0.1 // indirect + github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect + github.com/mschoch/smat v0.2.0 // indirect + github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef // indirect + go.etcd.io/bbolt v1.3.5 // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect +) diff --git a/exp/go.sum b/exp/go.sum new file mode 100644 index 0000000..1ea9e46 --- /dev/null +++ b/exp/go.sum @@ -0,0 +1,67 @@ +github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo= +github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA= +github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= +github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/blevesearch/bleve/v2 v2.3.7 h1:nIfIrhv28tvgBpbVF8Dq7/U1zW/YiwSqg/PBgE3x8bo= +github.com/blevesearch/bleve/v2 v2.3.7/go.mod h1:2tToYD6mDeseIA13jcZiEEqYrVLg6xdk0v6+F7dWquU= +github.com/blevesearch/bleve_index_api v1.0.5 h1:Lc986kpC4Z0/n1g3gg8ul7H+lxgOQPcXb9SxvQGu+tw= +github.com/blevesearch/bleve_index_api v1.0.5/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms= +github.com/blevesearch/geo v0.1.17 h1:AguzI6/5mHXapzB0gE9IKWo+wWPHZmXZoscHcjFgAFA= +github.com/blevesearch/geo v0.1.17/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM= +github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= +github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= +github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= +github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= +github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= +github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= +github.com/blevesearch/scorch_segment_api/v2 v2.1.4 h1:LmGmo5twU3gV+natJbKmOktS9eMhokPGKWuR+jX84vk= +github.com/blevesearch/scorch_segment_api/v2 v2.1.4/go.mod h1:PgVnbbg/t1UkgezPDu8EHLi1BHQ17xUwsFdU6NnOYS0= +github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= +github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= +github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= +github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= +github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= +github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= +github.com/blevesearch/vellum v1.0.9 h1:PL+NWVk3dDGPCV0hoDu9XLLJgqU4E5s/dOeEJByQ2uQ= +github.com/blevesearch/vellum v1.0.9/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k= +github.com/blevesearch/zapx/v11 v11.3.7 h1:Y6yIAF/DVPiqZUA/jNgSLXmqewfzwHzuwfKyfdG+Xaw= +github.com/blevesearch/zapx/v11 v11.3.7/go.mod h1:Xk9Z69AoAWIOvWudNDMlxJDqSYGf90LS0EfnaAIvXCA= +github.com/blevesearch/zapx/v12 v12.3.7 h1:DfQ6rsmZfEK4PzzJJRXjiM6AObG02+HWvprlXQ1Y7eI= +github.com/blevesearch/zapx/v12 v12.3.7/go.mod h1:SgEtYIBGvM0mgIBn2/tQE/5SdrPXaJUaT/kVqpAPxm0= +github.com/blevesearch/zapx/v13 v13.3.7 h1:igIQg5eKmjw168I7av0Vtwedf7kHnQro/M+ubM4d2l8= +github.com/blevesearch/zapx/v13 v13.3.7/go.mod h1:yyrB4kJ0OT75UPZwT/zS+Ru0/jYKorCOOSY5dBzAy+s= +github.com/blevesearch/zapx/v14 v14.3.7 h1:gfe+fbWslDWP/evHLtp/GOvmNM3sw1BbqD7LhycBX20= +github.com/blevesearch/zapx/v14 v14.3.7/go.mod h1:9J/RbOkqZ1KSjmkOes03AkETX7hrXT0sFMpWH4ewC4w= +github.com/blevesearch/zapx/v15 v15.3.9 h1:/s9zqKxFaZKQTTcMO2b/Tup0ch5MSztlvw+frVDfIBk= +github.com/blevesearch/zapx/v15 v15.3.9/go.mod h1:m7Y6m8soYUvS7MjN9eKlz1xrLCcmqfFadmu7GhWIrLY= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 h1:gtexQ/VGyN+VVFRXSFiguSNcXmS6rkKT+X7FdIrTtfo= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= +github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c h1:Tx0rgi0AplSV3N5wrZK7EC8dtNPZOGuyG+ioTp2dTn8= +github.com/johbar/go-poppler v0.0.0-20221016201615-2d089fcde50c/go.mod h1:8d4ecx/m9sNgc36I9FHwFQxQVnqPq5GXpjdlts87Sh0= +github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede h1:YrgBGwxMRK0Vq0WSCWFaZUnTsrA/PZE/xs1QZh+/edg= +github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef h1:zpOobzu7pmkVFs/XNuhJvxHCdQrGWxybcfVDabi//B4= +github.com/ungerik/go-cairo v0.0.0-20220815093914-e24bd4259cef/go.mod h1:yLTJg56omDJ+JVxZ5whpCrZgQdaSs+OBdFa+X6ViJcI= +go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= +go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=