zeroth
6 years ago
3 changed files with 26 additions and 13 deletions
Binary file not shown.
@ -1,13 +0,0 @@ |
|||||
{ |
|
||||
"id": "00.", |
|
||||
"name": "Yes", |
|
||||
"email": "sure@whynot.com", |
|
||||
"friend": "Y", |
|
||||
"content": "A sound file.", |
|
||||
"files":[ |
|
||||
"files/00. Pushing Scores/Participants.txt", |
|
||||
"files/00. Pushing Scores/PushingScores.html", |
|
||||
"files/00. Pushing Scores/events.txt" |
|
||||
|
|
||||
] |
|
||||
} |
|
@ -0,0 +1,26 @@ |
|||||
|
# this code is split in two parts: |
||||
|
# going through the description html files and gathering the interesting words in a json file; |
||||
|
# and going through the files again to replace words that also appear in the json with an a href version |
||||
|
|
||||
|
import sys, os |
||||
|
import nltk |
||||
|
from nltk import word_tokenize |
||||
|
from nltk.util import trigrams |
||||
|
|
||||
|
# text analysis |
||||
|
def analysis(file): |
||||
|
# print("yes") |
||||
|
file_trigrams = trigrams(content) |
||||
|
print(file_trigrams) |
||||
|
|
||||
|
|
||||
|
|
||||
|
# reading each individual html file |
||||
|
path = "static/files/" |
||||
|
for path, subdirs, files in os.walk(path): |
||||
|
for name in files: |
||||
|
if name.endswith('html'): |
||||
|
file = os.path.join(path, name) |
||||
|
with open(file) as f: |
||||
|
content = f.read() |
||||
|
analysis(content) |
Loading…
Reference in new issue