so many changes

2019-05-04 16:27:50 +02:00 · 2019-05-04 16:27:50 +02:00 · 313554644b
commit 313554644b
parent e8fa4acbff
23 changed files with 41001 additions and 97 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/pycache/config.cpython-37.pyc
+++ b/pycache/config.cpython-37.pyc
--- a/pycache/contextualise.cpython-37.pyc
+++ b/pycache/contextualise.cpython-37.pyc
--- a/pycache/putallhtmltexttogether.cpython-36.pyc
+++ b/pycache/putallhtmltexttogether.cpython-36.pyc
--- a/allhtml.txt
+++ b/allhtml.txt
--- a/contextualise.py
+++ b/contextualise.py
@ -36,7 +36,7 @@ pathofwords = []
 pathofnumbers = []

 #reading wordlist.json
-with open('wordlist_v3.json', 'r') as f:
+with open('wordlist.json', 'r') as f:
    wordlist_dict = json.load(f)


@ -118,10 +118,6 @@ def description():
            if file.lower().endswith(('.html')):
                with open("static/"+file,"r", encoding='utf-8') as f:
                    textfile = f.read()
-                    word = "way"
-                    wordlinked = "<a href='/diverge?search="+word+"'>"+word+"</a>"
-                    textfile = re.sub(word, wordlinked, textfile)
-
                    textfile = Markup(textfile)

    return render_template('description.html', datafromjson=datafromjson, itemid=itemid, textfile=textfile, idno=idno)
--- a/generate_links.py
+++ b/generate_links.py
@ -0,0 +1,18 @@
+#!/usr/bin/env python
+import sys, os
+import json
+import re
+
+with open('wordlist.json', 'r',  encoding='utf-8') as f:
+    wordlist_dict = json.load(f)
+
+path = "static/files/"
+for path, subdirs, files in os.walk(path):
+    for name in files:
+        if name.endswith('html'):
+            file = os.path.join(path, name)
+            with open(file, encoding='utf-8') as f:
+                textfile = f.read()
+                for word in wordlist_dict:
+                    wordlinked = "<a href='/diverge?search="+word+"'>"+word+"</a>"
+                    textfile = re.sub(word, wordlinked, textfile)
--- a/json_actions.py
+++ b/json_actions.py
@ -1,38 +0,0 @@
-import json
-
-
-# # to iterate through existing json file and find the correct json file 
-# def find_json(id):
-# 	get path/to/file
-
-# 	return file
-
-# # 
-# def save_json(id, name, email, friend, content):
-# 	file
-# 	data = {"id": "path/to/file", "name":,"email":,"friend":,"content":}
-
-#     with open('file.json', 'w') as f:
-#         json.dump(data, f)
-
-
-
-
-# def jaction(original, id, name, email, friend, content):
-# 	f = find_json_file(id)
-# 	data = make_dict(f)
-
-# 	updated = update_dict(data, name, email, friend, content)
-# 	save_json_file(f, updated)
-
-# # to find the file with the correct id
-# def find_json_file():
-# 	f = open('file.json', 'w')
-# 	iterate files to find id
-# 	return f
-
-# # saving the json file
-# def save_json_file(name, email, friend, content):
-# 	dict= request.args.get(
-# 	write(file, json.dump(data))
-
--- a/mostcommon.txt
+++ b/mostcommon.txt
@ -0,0 +1 @@
+[('graphic', 540), ('sound', 510), ('Rotterdam', 480), ('nl', 480), ('music', 450), ('notation', 420), ('project', 420), ('de', 390), ('new', 360), ('The', 360), ('DE', 360), ('PLAYER', 360), ('TGC', 330), ('art', 300), ('3', 300), ('van', 270), ('performance', 270), ('Gamma', 270), ('Circulaire', 270), ('event', 240), ('Tetra', 240), ("'", 240), ('score', 210), ('release', 210), ('Kris', 210), ('2017', 180), ('artists', 180), ('scores', 180), ('Antwerp', 180), ('2.0', 180), ('George', 180), ('I', 180), ('Remco', 150), ('Bladel', 150), ('For', 150), ('publishing', 150), ('Score', 150), ('us', 150), ('XPUB', 150), ('magazine', 150), ('Media', 150), ('2018', 150), ('Paradiso', 150), ('This', 150), ('research', 150), ('Vaast', 150), ('Colson', 150), ('Art', 150), ('avant-garde', 150), ('Remörk', 150)]
--- a/putallhtmltexttogether.py
+++ b/putallhtmltexttogether.py
@ -0,0 +1,27 @@
+import sys, os
+from nltk import word_tokenize
+from nltk import everygrams
+from nltk import FreqDist
+
+stopws = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't", ",", ".", "?","!",":","(",")",">","<","@","#","``","/","–","''","‘","-","’", "DOCTYPE", "html", "!"]
+
+
+path = "static/files/"
+for path, subdirs, files in os.walk(path):
+    for name in files:
+        if name.endswith('html'):
+            file = os.path.join(path, name)
+            total = open("allhtml.txt", "a")
+            with open(file) as f:
+                content = f.read()
+                total.write(content)
+            total.close()
+
+with open('allhtml.txt') as f:
+    content = f.read()
+    tokens = word_tokenize(content)
+    tokens = [token for token in tokens if token not in stopws]
+    freq_file=FreqDist(tokens)
+    listofwords = open("mostcommon.txt", "w+")
+    listofwords.write(str(freq_file.most_common(50)))
+    listofwords.close()
--- a/static/files/.DS_Store
+++ b/static/files/.DS_Store
--- a/static/files/00/00.Participants.html
+++ b/static/files/00/00.Participants.html
--- a/static/files/00/00.PushingScores.html
+++ b/static/files/00/00.PushingScores.html
--- a/static/files/00/00.events.html
+++ b/static/files/00/00.events.html
--- a/static/files/04/04.blurb.txt
+++ b/static/files/04/04.blurb.txt
@ -0,0 +1,5 @@
+Here is a description of the event / artist / work.
+
+For the event, it can describe what happened, where it happened, when it happened.
+For the artist, it can introduce the artist.
+For the work, it can describe the content.
--- a/static/files/04/04.motivation.txt
+++ b/static/files/04/04.motivation.txt
@ -0,0 +1 @@
+Why was the artist / piece / event included in the collection.
--- a/static/files/04/04.practice.txt
+++ b/static/files/04/04.practice.txt
@ -0,0 +1 @@
+Here you can go into length about an artists’ practice. For example, interviews, personal statements, reviews etc.
--- a/static/files/05/05.metadata.html
+++ b/static/files/05/05.metadata.html
@ -1,4 +1,4 @@
-<h1>Type of object: Event</h1>
+Type of object: Event
 Project:
 Description: Presented at WIELS Art Book fair 2016 was this live made copy zine named "Carlson invents, Colson presents: 99 spines produced on a modified Canon IR2016 copy machine" by Vaast Colson. Produced on a 'prepared copier'. The copy machine is amplified by several internal microphones by which the sound of every run is recorded. Each copy-run of 99 copies (the maximum run of the machine) on transparent foil will be accompanied by a foil cover with the dub cut audiofile in it. The image copied in the zine is a drawing which is engraved in the glassplate of the copy machine.
 Author: Vaast Colson, De Player
--- a/static/files/17/17.blurb.html
+++ b/static/files/17/17.blurb.html
@ -1,10 +1,4 @@

-<!DOCTYPE html>
-<html>
-<head>
-	<title></title>
-</head>
-<body>
 George Brecht (August 27, 1926 – December 5, 2008), born George Ellis MacDiarmid, was an American conceptual artist and avant-garde composer,
 as well as a professional chemist who worked as a consultant for companies including Pfizer, Johnson & Johnson, and Mobil Oil. He was a key member
 of, and influence on, Fluxus, the international group of avant-garde artists centred on George Maciunas, having been involved with the group
@ -13,15 +7,10 @@ from the first performances in Wiesbaden 1962 until Maciunas' death in 1978.
 One of the originators of 'participatory' art, in which the artwork can only be experienced by the active involvement of the viewer, he is most
 famous for his Event Scores such as Drip Music 1962, and is widely seen as an important precursor to conceptual art. He described his own art as a way of “ensuring that the details of everyday life, the random constellations of objects that surround us, stop going unnoticed.”

-<br />
-<br />

-<h2>SUNDOWN VEHICLE EVENT by GEORGE BRECHT</h2>
+SUNDOWN VEHICLE EVENT by GEORGE BRECHT
 Left: George Brecht, 1961, Two Vehicle Events, Detail of a 3.5 " by 4.5. card.  One of many similar kinds of instructions that were given to participants.
 Right: Vehicles, drivers & interested students gather before sunset for a performance of George Brecht's "Vehicle Sundown Event".
 Location: St Vincent College, Latrobe, PA., parking area behind Sportsman's Hall, 1963.

 Steve Joy took me to meet  George Brecht in his studio when I was in residence at St Michael's in Manhattan (c.1962). We became friends and GB mailed instruction cards to me.  I brought Steve Joy to St Vincent College when I returned to the monastery from Paris in 1963. GB agreed to provide instructions for an event at St Vincent. For his  "Vehicle Sundown Event", GB published a set of about 50 cards to be given to  participants who participated in the event with their vehicles. Each card held an instruction to be performed with a vehicle. Drivers  were instructed to assemble at sundown in a parking lot and randomly park their vehicles.  Then each driver, with a shuffled deck of instructions, would begin performing at the sound of a signal. Participants  performed about 50 events such as "turn on lights", "start engine", "stop engine", "open window". This work was performed at St Vincent College under the direction of Stephen Joy with Roman Verostko assisting. c. 1963  ( I can confirm that Fr Melvin Ruprecht participated. I believe it was before I went to Washington as NCE editor, rv0
-
-</body>
-</html>
--- a/textedit.py
+++ b/textedit.py
@ -1,26 +1,132 @@
-# this code is split in two parts:
-# going through the description html files and gathering the interesting words in a json file;
-# and going through the files again to replace words that also appear in the json with an a href version
-
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 import sys, os
-import nltk
-from nltk import word_tokenize
-from nltk.util import trigrams
+from nltk import sent_tokenize, word_tokenize
+from nltk import everygrams
+from nltk import FreqDist
+import json
+import re

-# text analysis
-def analysis(file):
-    # print("yes")
-    file_trigrams = trigrams(content)
-    print(file_trigrams)
+"""
+PART 1
+We create the dictionary and save it.
+"""

+stopws = [",", ".", "?","!",":","(",")",">","<","@","#","``","/","–","''","‘","-","’", "DOCTYPE", "html", "!", "'", "<br>", "<br />", "/body", "/html", "/head", "h2", "/h2", "h1", "/h1","”","“"]

-
-# reading each individual html file
 path = "static/files/"
 for path, subdirs, files in os.walk(path):
    for name in files:
        if name.endswith('html'):
            file = os.path.join(path, name)
+            total = open("allhtml.txt", "a")
            with open(file) as f:
                content = f.read()
-            analysis(content)
+                total.write(content)
+            total.close()
+
+keyword_list = []
+
+# with open('allhtml.txt') as f:
+#     content = f.read()
+#     tokens = word_tokenize(content)
+#     tokens = [token for token in tokens if token not in stopws]
+#     freq_file=FreqDist(tokens)
+#     print(tokens)
+#     keyword_list.append(freq_file.most_common(50))
+#     print(keyword_list[0])
+
+with open('allhtml.txt') as f:
+    content = f.read()
+    tokens = word_tokenize(content)
+    tokens = [token for token in tokens if token not in stopws]
+    keyword_list = list(set(tokens))
+    # print(tokens)
+    # print(keyword_list)
+
+"""
+PART 2
+We iterate through the entire collection of html files, tokenize the words, and check to see whether any of them is in the keyword_list. If they are, then we generate a json file.
+"""
+
+# wordlist = {}
+# avoiding_repetition = []
+
+
+sentences_w_word = {}
+
+def analysis(the_word, file_name):
+    id = file_name[13:15]
+    with open(file_name) as f:
+        content = f.read()
+    sent_tokens = sent_tokenize(content)
+    new_sent_tokens = []
+    for sent_token in sent_tokens:
+        if the_word in sent_token:
+            new_sent_tokens.append({'id': id, 'sentence': sent_token.replace('\n', ' ').strip("'<>()")})
+    if the_word in sentences_w_word: # if this is not the first iteration
+        previous_sent_tokens = sentences_w_word[the_word]
+        full_sent_tokens = previous_sent_tokens + new_sent_tokens
+    else:
+        full_sent_tokens = new_sent_tokens
+    sentences_w_word[word] = full_sent_tokens
+
+
+
+path = "static/files/"
+for path, subdirs, files in os.walk(path):
+    for name in files:
+        if name.endswith('html'):
+            file = os.path.join(path, name)
+            for word in keyword_list:
+                analysis(word, file)
+
+with open('wordlist.json', 'w', encoding="utf8") as outfile:
+    json.dump(sentences_w_word, outfile, ensure_ascii=False)
+
+
+# def analysis(file, id):
+#     sent_tokens = sent_tokenize(file) # sentence tokenizing
+#     for sent_token in sent_tokens:
+#         tokens = word_tokenize(sent_token) # word tokenizing
+#         print(tokens)
+#         for token in tokens:
+#             for first in keyword_list:
+#                 if token == first: # if token is in keyword_list
+#                     if token not in wordlist:
+#                         wordlist[token] = []
+#                         sent_dict = {}
+#                         sent_dict["id"]=id
+#                         sent_dict["sentence"] = sent_token.replace('\n', ' ')
+#                         wordlist[token].append(sent_dict)
+#                     elif token not in avoiding_repetition:
+#                         # print(wordlist[token])
+#                         sent_dict = {}
+#                         sent_dict["id"]=id
+#                         sent_dict["sentence"] = sent_token.replace('\n', ' ')
+#                         wordlist[token].append(sent_dict)
+#                         avoiding_repetition.append(token)
+
+
+# with open('static/files/17/17.blurb.html') as f:
+#     content = f.read()
+#     analysis(content, '17')
+
+
+# # reading each individual html file
+# path = "static/files/"
+# for path, subdirs, files in os.walk(path):
+#     for name in files:
+#         if name.endswith('html'):
+#             file = os.path.join(path, name)
+#             with open(file) as f:
+#                 content = f.read()
+#             id=name[:2]
+#             analysis(content, id)
+
+# json_wordlist = json.dumps(wordlist)
+# for item in wordlist:
+#     for item2 in wordlist[item]:
+#         print(item)
+#         print(item2["sentence"])
+#         print("\n")
--- a/wordlist.json
+++ b/wordlist.json
--- a/wordlist_v2.json
+++ b/wordlist_v2.json
@ -1,13 +0,0 @@
-{
-
-"way" : [
-  {"id": ["17", "He described his own art as a way of 'ensuring that the details of everyday life, the random constellations of objects that surround us, stop going unnoticed.'"]},
-  {"id": ["00", "Our ambition, and that of our collaborating partners, is to emancipate graphic notation from the confines of the modernist tradition, in such a way that it may remain an innovative and provocative medium for decades to come."]}
-],
-
-"artwork" : [
-    {"id": ["17", "One of the originators of 'participatory' art, in which the artwork can only be experienced by the active involvement of the viewer, he is most famous for his Event Scores such as Drip Music 1962, and is widely seen as an important precursor to conceptual art."]},
-    {"id": ["00", "It unfolds through a nomadic program which includes the creation of newly commissioned artworks and public events that addres scontemporary questions and issues in this particular field.",
-    "The discursive program for 2016–2017 will include lectures, presentations of newly commissioned artworks, concert evenings, and workshops."]}
-]
-}
--- a/wordlist_v3.json
+++ b/wordlist_v3.json
@ -16,6 +16,8 @@
  }
 ],

+
+
 "artwork" : [
  {
    "id" : "17",
				`@ -0,0 +1 @@`
				[('graphic', 540), ('sound', 510), ('Rotterdam', 480), ('nl', 480), ('music', 450), ('notation', 420), ('project', 420), ('de', 390), ('new', 360), ('The', 360), ('DE', 360), ('PLAYER', 360), ('TGC', 330), ('art', 300), ('3', 300), ('van', 270), ('performance', 270), ('Gamma', 270), ('Circulaire', 270), ('event', 240), ('Tetra', 240), ("'", 240), ('score', 210), ('release', 210), ('Kris', 210), ('2017', 180), ('artists', 180), ('scores', 180), ('Antwerp', 180), ('2.0', 180), ('George', 180), ('I', 180), ('Remco', 150), ('Bladel', 150), ('For', 150), ('publishing', 150), ('Score', 150), ('us', 150), ('XPUB', 150), ('magazine', 150), ('Media', 150), ('2018', 150), ('Paradiso', 150), ('This', 150), ('research', 150), ('Vaast', 150), ('Colson', 150), ('Art', 150), ('avant-garde', 150), ('Remörk', 150)]
				`@ -0,0 +1 @@`
				`Why was the artist / piece / event included in the collection.`
				`@ -0,0 +1 @@`
				`Here you can go into length about an artists’ practice. For example, interviews, personal statements, reviews etc.`