|
|
@ -10,7 +10,7 @@ PART 1 |
|
|
|
We create the dictionary and save it. |
|
|
|
""" |
|
|
|
|
|
|
|
stopws = [",", ".", "?","!",":","(",")",">","<","@","#","``","/","–","''","‘","-","’", "DOCTYPE", "html", "!", "'", "<br>", "<br />", "/body", "/html", "/head", "h2", "/h2", "h1", "/h1","”","“"] |
|
|
|
stopws = [",", ".", "?","!",":","(",")",">","<","@","#","``","/","–","''","‘","-","’", "DOCTYPE", "html", "!", "'", "<br>", "<br />", "/body", "/html", "/head", "h2", "/h2", "h1", "/h1","”","“", "o", "Ca", "/"] |
|
|
|
|
|
|
|
path = "static/files/" |
|
|
|
for path, subdirs, files in os.walk(path): |
|
|
@ -55,7 +55,7 @@ def analysis(the_word, file_name): |
|
|
|
# print(re_word) |
|
|
|
# print(the_word) |
|
|
|
for sent_token in sent_tokens: |
|
|
|
if re.search(re_word, sent_token): |
|
|
|
if re.search(re_word, sent_token, re.IGNORECASE): |
|
|
|
new_sent_tokens.append({'id': id, 'sentence': sent_token.replace('\n', ' ').strip("'<>()“”")}) |
|
|
|
if the_word in sentences_w_word: # if this is not the first iteration |
|
|
|
previous_sent_tokens = sentences_w_word[the_word] |
|
|
|