2023-05-11 16:08:29 +02:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"log"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/blevesearch/bleve/v2"
|
2023-05-13 00:32:16 +02:00
|
|
|
"github.com/blevesearch/bleve/v2/analysis/lang/en"
|
|
|
|
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/ansi"
|
2023-05-11 16:08:29 +02:00
|
|
|
pdf "github.com/johbar/go-poppler"
|
|
|
|
)
|
|
|
|
|
|
|
|
func readPDF(name string) (string, error) {
|
|
|
|
doc, err := pdf.Open(name)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
defer doc.Close()
|
|
|
|
|
|
|
|
var txt string
|
|
|
|
for i := 0; i < doc.GetNPages(); i++ {
|
|
|
|
txt += doc.GetPage(i).Text()
|
|
|
|
}
|
|
|
|
|
|
|
|
return txt, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type datasheet struct {
|
|
|
|
filename string
|
|
|
|
filepath string
|
|
|
|
contents string
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
// create or open a bleve index
|
|
|
|
var index bleve.Index
|
|
|
|
var err error
|
2023-05-13 00:32:16 +02:00
|
|
|
idxMap := bleve.NewIndexMapping()
|
|
|
|
idxMap.DefaultAnalyzer = en.AnalyzerName
|
|
|
|
|
|
|
|
index, err = bleve.New("test.bleve", idxMap)
|
2023-05-11 16:08:29 +02:00
|
|
|
if err != nil {
|
|
|
|
index, err = bleve.Open("test.bleve")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// gather all datasheets - filename, filepath, contents
|
|
|
|
var datasheets []datasheet
|
|
|
|
if err := filepath.Walk("../datasheets", func(path string, info os.FileInfo, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
name := info.Name()
|
|
|
|
if strings.HasSuffix(name, "pdf") {
|
|
|
|
contents, err := readPDF(path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
datasheet := datasheet{
|
|
|
|
filename: name,
|
|
|
|
filepath: path,
|
|
|
|
contents: contents,
|
|
|
|
}
|
|
|
|
|
|
|
|
datasheets = append(datasheets, datasheet)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// index by filename and by content. no idea if this is The Way To Go
|
|
|
|
for _, datasheet := range datasheets {
|
|
|
|
contents, err := readPDF(datasheet.filepath)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := index.Index(datasheet.filename, contents); err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// query for something! change the string to test other possibilities
|
2023-05-13 00:32:16 +02:00
|
|
|
query := bleve.NewMatchQuery("slew rate")
|
|
|
|
|
2023-05-11 16:08:29 +02:00
|
|
|
search := bleve.NewSearchRequest(query)
|
2023-05-13 00:32:16 +02:00
|
|
|
search.Highlight = bleve.NewHighlightWithStyle(ansi.Name)
|
|
|
|
search.Size = 5
|
2023-05-13 01:31:13 +02:00
|
|
|
search.Fields = []string{"*"}
|
2023-05-13 00:32:16 +02:00
|
|
|
|
2023-05-11 16:08:29 +02:00
|
|
|
searchResults, err := index.Search(search)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// print out the results
|
|
|
|
fmt.Println(searchResults)
|
|
|
|
}
|