You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
1.1 KiB

6 years ago
import sys, os
import json
import re
with open('wordlist.json', 'r') as f:
6 years ago
wordlist_dict = json.load(f)
#goes through every single file ending in html
#
6 years ago
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
with open(file, 'r+', encoding="utf-8") as f:
6 years ago
textfile = f.read()
words = re.compile("([\w-]+)").split(textfile)
words_to_search = wordlist_dict.keys()
for i, word in enumerate(words):
if word in words_to_search:
words[i] = "<a href='/diverge?search={}'>{}</a>".format(word, word)
textfile = "".join(words)
# for word in wordlist_dict:
# word = re.escape(word)
# textfile = re.sub(r"(?<!<)(?<!</)(?<!ge\?)\b(%s)\b" %word, r"<a href='/diverge?search=\1'>\1</a>", textfile)
f.truncate(0)
f.write(textfile)
f.truncate()
print(textfile)