diff --git a/textedit.py b/textedit.py index 03ef906..9773b4f 100644 --- a/textedit.py +++ b/textedit.py @@ -51,8 +51,11 @@ def analysis(the_word, file_name): content = f.read() sent_tokens = sent_tokenize(content) new_sent_tokens = [] + re_word = r"\b" + re.escape(the_word) + r"\b" + # print(re_word) + # print(the_word) for sent_token in sent_tokens: - if the_word in sent_token: + if re.search(re_word, sent_token): new_sent_tokens.append({'id': id, 'sentence': sent_token.replace('\n', ' ').strip("'<>()“”")}) if the_word in sentences_w_word: # if this is not the first iteration previous_sent_tokens = sentences_w_word[the_word]