from whoosh.index import create_in from whoosh.fields import * from whoosh.qparser import QueryParser import csv import os import argparse from csvparser.csvparser import getfullpublication SCRIPT_DIR = os.path.dirname(__file__) DATA_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "data")) def index_csv_file(): filename = os.path.join(DATA_DIR, "varlib.csv") with open(filename, 'r', encoding='utf_8_sig') as libcsv: csv_as_dict = csv.DictReader(libcsv) for row in csv_as_dict: rowcontent = concatenate_csv_row(row) writer.add_document(title=row["Id"], path=u"/a", content=rowcontent) writer.commit() def search(searchinput): with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse(searchinput) results = searcher.search(query) bookid = results[0]['title'] for book in results: bookid = book['title'] print(f"result found: {bookid}") publication = getfullpublication(bookid) print(f"{publication['Author']} - {publication['Title']}") def concatenate_csv_row(row): rowcontent = [] rowcontent.append(row["Publication"]) rowcontent.append(row["Author"]) rowcontent.append(row["Fields"]) rowcontent.append(row["Type"]) rowcontent.append(row["Publishers"]) rowcontent.append(row["Highlights"]) rowcontent.append(row["Comments"]) return ' '.join(rowcontent) parser = argparse.ArgumentParser() parser.add_argument("-s", "--search", type=str) args = parser.parse_args() searchinput = args.search schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) ix = create_in(DATA_DIR, schema) writer = ix.writer() index_csv_file() print(searchinput) search(searchinput)