2019-05-04 16:27:50 +02:00
|
|
|
import sys, os
|
|
|
|
import json
|
|
|
|
import re
|
|
|
|
|
2019-05-21 11:54:11 +02:00
|
|
|
with open('wordlist.json', 'r') as f:
|
2019-05-04 16:27:50 +02:00
|
|
|
wordlist_dict = json.load(f)
|
2019-05-24 16:59:49 +02:00
|
|
|
#goes through every single file ending in html
|
|
|
|
#
|
2019-05-04 16:27:50 +02:00
|
|
|
path = "static/files/"
|
|
|
|
for path, subdirs, files in os.walk(path):
|
|
|
|
for name in files:
|
|
|
|
if name.endswith('html'):
|
|
|
|
file = os.path.join(path, name)
|
2019-05-24 16:59:49 +02:00
|
|
|
with open(file, 'r+', encoding="utf-8") as f:
|
2019-05-04 16:27:50 +02:00
|
|
|
textfile = f.read()
|
2019-07-12 17:31:34 +02:00
|
|
|
words = re.compile("([\w-]+)").split(textfile)
|
|
|
|
words_to_search = wordlist_dict.keys()
|
|
|
|
for i, word in enumerate(words):
|
|
|
|
if word in words_to_search:
|
|
|
|
words[i] = "<a href='/diverge?search={}'>{}</a>".format(word, word)
|
|
|
|
|
|
|
|
textfile = "".join(words)
|
|
|
|
# for word in wordlist_dict:
|
|
|
|
# word = re.escape(word)
|
|
|
|
# textfile = re.sub(r"(?<!<)(?<!</)(?<!ge\?)\b(%s)\b" %word, r"<a href='/diverge?search=\1'>\1</a>", textfile)
|
2019-05-05 02:26:25 +02:00
|
|
|
f.truncate(0)
|
|
|
|
f.write(textfile)
|
|
|
|
f.truncate()
|
2019-05-05 01:56:05 +02:00
|
|
|
|
2019-07-12 17:31:34 +02:00
|
|
|
print(textfile)
|