go-sh-manymanuals/exp/bleve.go
2023-05-13 00:32:16 +02:00

107 lines
2.1 KiB
Go

package main
import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/ansi"
pdf "github.com/johbar/go-poppler"
)
func readPDF(name string) (string, error) {
doc, err := pdf.Open(name)
if err != nil {
return "", err
}
defer doc.Close()
var txt string
for i := 0; i < doc.GetNPages(); i++ {
txt += doc.GetPage(i).Text()
}
return txt, nil
}
type datasheet struct {
filename string
filepath string
contents string
}
func main() {
// create or open a bleve index
var index bleve.Index
var err error
idxMap := bleve.NewIndexMapping()
idxMap.DefaultAnalyzer = en.AnalyzerName
index, err = bleve.New("test.bleve", idxMap)
if err != nil {
index, err = bleve.Open("test.bleve")
if err != nil {
log.Fatal(err)
}
}
// gather all datasheets - filename, filepath, contents
var datasheets []datasheet
if err := filepath.Walk("../datasheets", func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
name := info.Name()
if strings.HasSuffix(name, "pdf") {
contents, err := readPDF(path)
if err != nil {
return err
}
datasheet := datasheet{
filename: name,
filepath: path,
contents: contents,
}
datasheets = append(datasheets, datasheet)
}
return nil
}); err != nil {
log.Fatal(err)
}
// index by filename and by content. no idea if this is The Way To Go
for _, datasheet := range datasheets {
contents, err := readPDF(datasheet.filepath)
if err != nil {
log.Fatal(err)
}
if err := index.Index(datasheet.filename, contents); err != nil {
log.Fatal(err)
}
}
// query for something! change the string to test other possibilities
query := bleve.NewMatchQuery("slew rate")
search := bleve.NewSearchRequest(query)
search.Highlight = bleve.NewHighlightWithStyle(ansi.Name)
search.Size = 5
searchResults, err := index.Search(search)
if err != nil {
log.Fatal(err)
}
// print out the results
fmt.Println(searchResults)
}