package main import ( "fmt" "log" "os" "path/filepath" "strings" "github.com/blevesearch/bleve/v2" pdf "github.com/johbar/go-poppler" ) func readPDF(name string) (string, error) { doc, err := pdf.Open(name) if err != nil { return "", err } defer doc.Close() var txt string for i := 0; i < doc.GetNPages(); i++ { txt += doc.GetPage(i).Text() } return txt, nil } type datasheet struct { filename string filepath string contents string } func main() { // create or open a bleve index var index bleve.Index var err error mapping := bleve.NewIndexMapping() index, err = bleve.New("test.bleve", mapping) if err != nil { index, err = bleve.Open("test.bleve") if err != nil { log.Fatal(err) } } // gather all datasheets - filename, filepath, contents var datasheets []datasheet if err := filepath.Walk("../datasheets", func(path string, info os.FileInfo, err error) error { if err != nil { return err } name := info.Name() if strings.HasSuffix(name, "pdf") { contents, err := readPDF(path) if err != nil { return err } datasheet := datasheet{ filename: name, filepath: path, contents: contents, } datasheets = append(datasheets, datasheet) } return nil }); err != nil { log.Fatal(err) } // index by filename and by content. no idea if this is The Way To Go for _, datasheet := range datasheets { contents, err := readPDF(datasheet.filepath) if err != nil { log.Fatal(err) } if err := index.Index(datasheet.filename, contents); err != nil { log.Fatal(err) } } // query for something! change the string to test other possibilities query := bleve.NewMatchQuery("Enhanced-Page-Mode") search := bleve.NewSearchRequest(query) searchResults, err := index.Search(search) if err != nil { log.Fatal(err) } // print out the results fmt.Println(searchResults) }