added the new python file
This commit is contained in:
parent
7f455de9ae
commit
f14622e940
BIN
static/files/.DS_Store
vendored
BIN
static/files/.DS_Store
vendored
Binary file not shown.
@ -1,13 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "00.",
|
|
||||||
"name": "Yes",
|
|
||||||
"email": "sure@whynot.com",
|
|
||||||
"friend": "Y",
|
|
||||||
"content": "A sound file.",
|
|
||||||
"files":[
|
|
||||||
"files/00. Pushing Scores/Participants.txt",
|
|
||||||
"files/00. Pushing Scores/PushingScores.html",
|
|
||||||
"files/00. Pushing Scores/events.txt"
|
|
||||||
|
|
||||||
]
|
|
||||||
}
|
|
26
textedit.py
Normal file
26
textedit.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# this code is split in two parts:
|
||||||
|
# going through the description html files and gathering the interesting words in a json file;
|
||||||
|
# and going through the files again to replace words that also appear in the json with an a href version
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
import nltk
|
||||||
|
from nltk import word_tokenize
|
||||||
|
from nltk.util import trigrams
|
||||||
|
|
||||||
|
# text analysis
|
||||||
|
def analysis(file):
|
||||||
|
# print("yes")
|
||||||
|
file_trigrams = trigrams(content)
|
||||||
|
print(file_trigrams)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# reading each individual html file
|
||||||
|
path = "static/files/"
|
||||||
|
for path, subdirs, files in os.walk(path):
|
||||||
|
for name in files:
|
||||||
|
if name.endswith('html'):
|
||||||
|
file = os.path.join(path, name)
|
||||||
|
with open(file) as f:
|
||||||
|
content = f.read()
|
||||||
|
analysis(content)
|
Loading…
Reference in New Issue
Block a user