Browse Source

added a few extra 'views'

master
mb@mb 6 years ago
parent
commit
602115b755
  1. 17149
      index.json
  2. 45
      start.py
  3. 45
      tfidf.py

17149
index.json

File diff suppressed because it is too large

45
start.py

@ -10,8 +10,16 @@ def get_index():
return index return index
def get_results(query): def get_results(query):
results, index = tfidf.request_results(query) results, files = tfidf.request_results(query)
return results, index return results, files
def get_ordered():
results, files = tfidf.request_ordered()
return results, files
def get_ordered_all():
results, files = tfidf.request_ordered_all()
return results, files
# Create the application. # Create the application.
APP = flask.Flask(__name__) APP = flask.Flask(__name__)
@ -23,16 +31,47 @@ def index():
query = None query = None
results = None results = None
if request.args.get('q', ''):
query = request.args.get('q', '')
results, files = get_results(query)
return flask.render_template('results.html', query=query, results=results, files=files)
else:
index = get_index()
files = [manifesto for manifesto, _ in index.items()]
return flask.render_template('index.html', files=files)
@APP.route('/lists', methods=['GET', 'POST'])
def lists():
""" Displays the index page accessible at '/lists'
"""
query = None
results = None
if request.args.get('q', ''): if request.args.get('q', ''):
query = request.args.get('q', '') query = request.args.get('q', '')
results, index = get_results(query) results, index = get_results(query)
files = [manifesto for manifesto, _ in index.items()] files = [manifesto for manifesto, _ in index.items()]
return flask.render_template('results.html', query=query, results=results, files=files) return flask.render_template('results_lists.html', query=query, results=results, files=files)
else: else:
index = get_index() index = get_index()
files = [manifesto for manifesto, _ in index.items()] files = [manifesto for manifesto, _ in index.items()]
return flask.render_template('index.html', files=files) return flask.render_template('index.html', files=files)
@APP.route('/ordered', methods=['GET', 'POST'])
def ordered():
results, files = get_ordered()
return flask.render_template('ordered.html', files=files, results=results)
@APP.route('/ordered/all', methods=['GET', 'POST'])
def ordered_all():
results, files = get_ordered_all()
return flask.render_template('ordered.all.html', files=files, results=results)
# @APP.route('/ordered.all', methods=['GET', 'POST'])
# def ordered():
# results, files = get_ordered_all()
# return flask.render_template('ordered.all.html', files=files, results=results)
if __name__ == '__main__': if __name__ == '__main__':
if not 'index.json' in os.listdir('.'): if not 'index.json' in os.listdir('.'):
tfidf.create_index() tfidf.create_index()

45
tfidf.py

@ -1,4 +1,4 @@
import os, json import os, json, re
from math import log, exp from math import log, exp
from flask import Markup from flask import Markup
@ -146,23 +146,22 @@ def request_results(query):
# make a list of sentences that contain the query word # make a list of sentences that contain the query word
# and shape results object # and shape results object
for x, manifesto in results.items(): for x, manifesto in results.items():
sents = sentences[manifesto['name']]
value = manifesto['tfidf'] * 50000 value = manifesto['tfidf'] * 50000
result_sentences = [] result_sentences = []
count = 0 # count = 0
for s in manifesto['sentences']: for s in manifesto['sentences']:
done = 'no' done = 'no'
for word in tokenizer.tokenize(s): for word in tokenizer.tokenize(s):
if word == query: if word == query:
if count < 3: # set to include a max 3 results/manifesto in the results list # if count < 3: # set to include a max 3 results/manifesto in the results list
count += 1 # count += 1
if done is not 'yes': if done is not 'yes':
sentence = s.replace(query, '<strong style="font-size:{}%;">{}</strong>'.format(100 + value, query)) sentence = re.sub(r'[ .,;/\\*]'+query+r'[ ,.;/\\*]', '<strong style="font-size:{}%;"> {} </strong>'.format(100 + value, query), s)
html = Markup(sentence) html = Markup(sentence)
if count == 3: # if count == 3:
html = html + Markup('<div id="more">(...)<sup>*</sup></div>') # html = html + Markup('<div id="more">(...)<sup>*</sup></div>')
result_sentences.append(html) result_sentences.append(html)
done = 'yes' done = 'yes'
results[x]['sentences'] = result_sentences results[x]['sentences'] = result_sentences
print('*results returned*') print('*results returned*')
@ -178,13 +177,15 @@ def request_ordered():
results[manifesto] = words results[manifesto] = words
return results, files return results, files
# def request_ordered_all(): def request_ordered_all():
# f = open('index.json').read() f = open('index.json').read()
# index = json.loads(f) index = json.loads(f)
# files = [manifesto for manifesto, _ in index.items()] files = [manifesto for manifesto, _ in index.items()]
# results = [] results = []
# i = 0 i = 0
# for manifesto, _ in index.items(): for manifesto, _ in index.items():
# i += 1 for word, value in index[manifesto]['words'].items():
# [value, word, i] for word, value in index[manifesto]['words'].items() results.append([value, word, i])
# return results, files i += 1
results = sorted(results)
return results, files

Loading…
Cancel
Save