Browse Source

added the new python file

master
Cristina Cochior 6 years ago
parent
commit
f14622e940
  1. BIN
      static/files/.DS_Store
  2. 13
      static/files/00. Pushing Scores/00.json
  3. 26
      textedit.py

BIN
static/files/.DS_Store

Binary file not shown.

13
static/files/00. Pushing Scores/00.json

@ -1,13 +0,0 @@
{
"id": "00.",
"name": "Yes",
"email": "sure@whynot.com",
"friend": "Y",
"content": "A sound file.",
"files":[
"files/00. Pushing Scores/Participants.txt",
"files/00. Pushing Scores/PushingScores.html",
"files/00. Pushing Scores/events.txt"
]
}

26
textedit.py

@ -0,0 +1,26 @@
# this code is split in two parts:
# going through the description html files and gathering the interesting words in a json file;
# and going through the files again to replace words that also appear in the json with an a href version
import sys, os
import nltk
from nltk import word_tokenize
from nltk.util import trigrams
# text analysis
def analysis(file):
# print("yes")
file_trigrams = trigrams(content)
print(file_trigrams)
# reading each individual html file
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
with open(file) as f:
content = f.read()
analysis(content)
Loading…
Cancel
Save