added the new python file

This commit is contained in:
Cristina Cochior 2019-04-17 18:36:12 +02:00
parent 7f455de9ae
commit f14622e940
3 changed files with 26 additions and 13 deletions

BIN
static/files/.DS_Store vendored

Binary file not shown.

View File

@ -1,13 +0,0 @@
{
"id": "00.",
"name": "Yes",
"email": "sure@whynot.com",
"friend": "Y",
"content": "A sound file.",
"files":[
"files/00. Pushing Scores/Participants.txt",
"files/00. Pushing Scores/PushingScores.html",
"files/00. Pushing Scores/events.txt"
]
}

26
textedit.py Normal file
View File

@ -0,0 +1,26 @@
# this code is split in two parts:
# going through the description html files and gathering the interesting words in a json file;
# and going through the files again to replace words that also appear in the json with an a href version
import sys, os
import nltk
from nltk import word_tokenize
from nltk.util import trigrams
# text analysis
def analysis(file):
# print("yes")
file_trigrams = trigrams(content)
print(file_trigrams)
# reading each individual html file
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
with open(file) as f:
content = f.read()
analysis(content)