zeroth
6 years ago
3 changed files with 26 additions and 13 deletions
Binary file not shown.
@ -1,13 +0,0 @@ |
|||
{ |
|||
"id": "00.", |
|||
"name": "Yes", |
|||
"email": "sure@whynot.com", |
|||
"friend": "Y", |
|||
"content": "A sound file.", |
|||
"files":[ |
|||
"files/00. Pushing Scores/Participants.txt", |
|||
"files/00. Pushing Scores/PushingScores.html", |
|||
"files/00. Pushing Scores/events.txt" |
|||
|
|||
] |
|||
} |
@ -0,0 +1,26 @@ |
|||
# this code is split in two parts: |
|||
# going through the description html files and gathering the interesting words in a json file; |
|||
# and going through the files again to replace words that also appear in the json with an a href version |
|||
|
|||
import sys, os |
|||
import nltk |
|||
from nltk import word_tokenize |
|||
from nltk.util import trigrams |
|||
|
|||
# text analysis |
|||
def analysis(file): |
|||
# print("yes") |
|||
file_trigrams = trigrams(content) |
|||
print(file_trigrams) |
|||
|
|||
|
|||
|
|||
# reading each individual html file |
|||
path = "static/files/" |
|||
for path, subdirs, files in os.walk(path): |
|||
for name in files: |
|||
if name.endswith('html'): |
|||
file = os.path.join(path, name) |
|||
with open(file) as f: |
|||
content = f.read() |
|||
analysis(content) |
Loading…
Reference in new issue