added the new python file
This commit is contained in:
parent
7f455de9ae
commit
f14622e940
BIN
static/files/.DS_Store
vendored
BIN
static/files/.DS_Store
vendored
Binary file not shown.
@ -1,13 +0,0 @@
|
||||
{
|
||||
"id": "00.",
|
||||
"name": "Yes",
|
||||
"email": "sure@whynot.com",
|
||||
"friend": "Y",
|
||||
"content": "A sound file.",
|
||||
"files":[
|
||||
"files/00. Pushing Scores/Participants.txt",
|
||||
"files/00. Pushing Scores/PushingScores.html",
|
||||
"files/00. Pushing Scores/events.txt"
|
||||
|
||||
]
|
||||
}
|
26
textedit.py
Normal file
26
textedit.py
Normal file
@ -0,0 +1,26 @@
|
||||
# this code is split in two parts:
|
||||
# going through the description html files and gathering the interesting words in a json file;
|
||||
# and going through the files again to replace words that also appear in the json with an a href version
|
||||
|
||||
import sys, os
|
||||
import nltk
|
||||
from nltk import word_tokenize
|
||||
from nltk.util import trigrams
|
||||
|
||||
# text analysis
|
||||
def analysis(file):
|
||||
# print("yes")
|
||||
file_trigrams = trigrams(content)
|
||||
print(file_trigrams)
|
||||
|
||||
|
||||
|
||||
# reading each individual html file
|
||||
path = "static/files/"
|
||||
for path, subdirs, files in os.walk(path):
|
||||
for name in files:
|
||||
if name.endswith('html'):
|
||||
file = os.path.join(path, name)
|
||||
with open(file) as f:
|
||||
content = f.read()
|
||||
analysis(content)
|
Loading…
Reference in New Issue
Block a user