import sys, os import json import re with open('wordlist.json', 'r') as f: wordlist_dict = json.load(f) #goes through every single file ending in html # path = "static/files/" for path, subdirs, files in os.walk(path): for name in files: if name.endswith('html'): file = os.path.join(path, name) with open(file, 'r+', encoding="utf-8") as f: textfile = f.read() words = re.compile("([\w-]+)").split(textfile) words_to_search = wordlist_dict.keys() for i, word in enumerate(words): if word in words_to_search: words[i] = "{}".format(word, word) textfile = "".join(words) # for word in wordlist_dict: # word = re.escape(word) # textfile = re.sub(r"(?\1", textfile) f.truncate(0) f.write(textfile) f.truncate() print(textfile)