Browse Source

so many changes

master
Cristina Cochior 5 years ago
parent
commit
313554644b
  1. BIN
      .DS_Store
  2. BIN
      __pycache__/config.cpython-37.pyc
  3. BIN
      __pycache__/contextualise.cpython-37.pyc
  4. BIN
      __pycache__/putallhtmltexttogether.cpython-36.pyc
  5. 40821
      allhtml.txt
  6. 6
      contextualise.py
  7. 18
      generate_links.py
  8. 38
      json_actions.py
  9. 1
      mostcommon.txt
  10. 27
      putallhtmltexttogether.py
  11. BIN
      static/files/.DS_Store
  12. 0
      static/files/00/00.Participants.html
  13. 0
      static/files/00/00.PushingScores.html
  14. 0
      static/files/00/00.events.html
  15. 5
      static/files/04/04.blurb.txt
  16. 1
      static/files/04/04.motivation.txt
  17. 1
      static/files/04/04.practice.txt
  18. 2
      static/files/05/05.metadata.html
  19. 13
      static/files/17/17.blurb.html
  20. 136
      textedit.py
  21. 14
      wordlist.json
  22. 13
      wordlist_v2.json
  23. 2
      wordlist_v3.json

BIN
.DS_Store

Binary file not shown.

BIN
__pycache__/config.cpython-37.pyc

Binary file not shown.

BIN
__pycache__/contextualise.cpython-37.pyc

Binary file not shown.

BIN
__pycache__/putallhtmltexttogether.cpython-36.pyc

Binary file not shown.

40821
allhtml.txt

File diff suppressed because one or more lines are too long

6
contextualise.py

@ -36,7 +36,7 @@ pathofwords = []
pathofnumbers = []
#reading wordlist.json
with open('wordlist_v3.json', 'r') as f:
with open('wordlist.json', 'r') as f:
wordlist_dict = json.load(f)
@ -118,10 +118,6 @@ def description():
if file.lower().endswith(('.html')):
with open("static/"+file,"r", encoding='utf-8') as f:
textfile = f.read()
word = "way"
wordlinked = "<a href='/diverge?search="+word+"'>"+word+"</a>"
textfile = re.sub(word, wordlinked, textfile)
textfile = Markup(textfile)
return render_template('description.html', datafromjson=datafromjson, itemid=itemid, textfile=textfile, idno=idno)

18
generate_links.py

@ -0,0 +1,18 @@
#!/usr/bin/env python
import sys, os
import json
import re
with open('wordlist.json', 'r', encoding='utf-8') as f:
wordlist_dict = json.load(f)
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
with open(file, encoding='utf-8') as f:
textfile = f.read()
for word in wordlist_dict:
wordlinked = "<a href='/diverge?search="+word+"'>"+word+"</a>"
textfile = re.sub(word, wordlinked, textfile)

38
json_actions.py

@ -1,38 +0,0 @@
import json
# # to iterate through existing json file and find the correct json file
# def find_json(id):
# get path/to/file
# return file
# #
# def save_json(id, name, email, friend, content):
# file
# data = {"id": "path/to/file", "name":,"email":,"friend":,"content":}
# with open('file.json', 'w') as f:
# json.dump(data, f)
# def jaction(original, id, name, email, friend, content):
# f = find_json_file(id)
# data = make_dict(f)
# updated = update_dict(data, name, email, friend, content)
# save_json_file(f, updated)
# # to find the file with the correct id
# def find_json_file():
# f = open('file.json', 'w')
# iterate files to find id
# return f
# # saving the json file
# def save_json_file(name, email, friend, content):
# dict= request.args.get(
# write(file, json.dump(data))

1
mostcommon.txt

@ -0,0 +1 @@
[('graphic', 540), ('sound', 510), ('Rotterdam', 480), ('nl', 480), ('music', 450), ('notation', 420), ('project', 420), ('de', 390), ('new', 360), ('The', 360), ('DE', 360), ('PLAYER', 360), ('TGC', 330), ('art', 300), ('3', 300), ('van', 270), ('performance', 270), ('Gamma', 270), ('Circulaire', 270), ('event', 240), ('Tetra', 240), ("'", 240), ('score', 210), ('release', 210), ('Kris', 210), ('2017', 180), ('artists', 180), ('scores', 180), ('Antwerp', 180), ('2.0', 180), ('George', 180), ('I', 180), ('Remco', 150), ('Bladel', 150), ('For', 150), ('publishing', 150), ('Score', 150), ('us', 150), ('XPUB', 150), ('magazine', 150), ('Media', 150), ('2018', 150), ('Paradiso', 150), ('This', 150), ('research', 150), ('Vaast', 150), ('Colson', 150), ('Art', 150), ('avant-garde', 150), ('Remörk', 150)]

27
putallhtmltexttogether.py

@ -0,0 +1,27 @@
import sys, os
from nltk import word_tokenize
from nltk import everygrams
from nltk import FreqDist
stopws = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't", ",", ".", "?","!",":","(",")",">","<","@","#","``","/","","''","","-","", "DOCTYPE", "html", "!"]
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
total = open("allhtml.txt", "a")
with open(file) as f:
content = f.read()
total.write(content)
total.close()
with open('allhtml.txt') as f:
content = f.read()
tokens = word_tokenize(content)
tokens = [token for token in tokens if token not in stopws]
freq_file=FreqDist(tokens)
listofwords = open("mostcommon.txt", "w+")
listofwords.write(str(freq_file.most_common(50)))
listofwords.close()

BIN
static/files/.DS_Store

Binary file not shown.

0
static/files/00/Participants.html → static/files/00/00.Participants.html

0
static/files/00/PushingScores.html → static/files/00/00.PushingScores.html

0
static/files/00/events.html → static/files/00/00.events.html

5
static/files/04/04.blurb.txt

@ -0,0 +1,5 @@
Here is a description of the event / artist / work.
For the event, it can describe what happened, where it happened, when it happened.
For the artist, it can introduce the artist.
For the work, it can describe the content.

1
static/files/04/04.motivation.txt

@ -0,0 +1 @@
Why was the artist / piece / event included in the collection.

1
static/files/04/04.practice.txt

@ -0,0 +1 @@
Here you can go into length about an artists’ practice. For example, interviews, personal statements, reviews etc.

2
static/files/05/05.metadata.html

@ -1,4 +1,4 @@
<h1>Type of object: Event</h1>
Type of object: Event
Project:
Description: Presented at WIELS Art Book fair 2016 was this live made copy zine named "Carlson invents, Colson presents: 99 spines produced on a modified Canon IR2016 copy machine" by Vaast Colson. Produced on a 'prepared copier'. The copy machine is amplified by several internal microphones by which the sound of every run is recorded. Each copy-run of 99 copies (the maximum run of the machine) on transparent foil will be accompanied by a foil cover with the dub cut audiofile in it. The image copied in the zine is a drawing which is engraved in the glassplate of the copy machine.
Author: Vaast Colson, De Player

13
static/files/17/17.blurb.html

@ -1,10 +1,4 @@
<!DOCTYPE html>
<html>
<head>
<title></title>
</head>
<body>
George Brecht (August 27, 1926 – December 5, 2008), born George Ellis MacDiarmid, was an American conceptual artist and avant-garde composer,
as well as a professional chemist who worked as a consultant for companies including Pfizer, Johnson & Johnson, and Mobil Oil. He was a key member
of, and influence on, Fluxus, the international group of avant-garde artists centred on George Maciunas, having been involved with the group
@ -13,15 +7,10 @@ from the first performances in Wiesbaden 1962 until Maciunas' death in 1978.
One of the originators of 'participatory' art, in which the artwork can only be experienced by the active involvement of the viewer, he is most
famous for his Event Scores such as Drip Music 1962, and is widely seen as an important precursor to conceptual art. He described his own art as a way of “ensuring that the details of everyday life, the random constellations of objects that surround us, stop going unnoticed.”
<br />
<br />
<h2>SUNDOWN VEHICLE EVENT by GEORGE BRECHT</h2>
SUNDOWN VEHICLE EVENT by GEORGE BRECHT
Left: George Brecht, 1961, Two Vehicle Events, Detail of a 3.5 " by 4.5. card. One of many similar kinds of instructions that were given to participants.
Right: Vehicles, drivers & interested students gather before sunset for a performance of George Brecht's "Vehicle Sundown Event".
Location: St Vincent College, Latrobe, PA., parking area behind Sportsman's Hall, 1963.
Steve Joy took me to meet George Brecht in his studio when I was in residence at St Michael's in Manhattan (c.1962). We became friends and GB mailed instruction cards to me. I brought Steve Joy to St Vincent College when I returned to the monastery from Paris in 1963. GB agreed to provide instructions for an event at St Vincent. For his "Vehicle Sundown Event", GB published a set of about 50 cards to be given to participants who participated in the event with their vehicles. Each card held an instruction to be performed with a vehicle. Drivers were instructed to assemble at sundown in a parking lot and randomly park their vehicles. Then each driver, with a shuffled deck of instructions, would begin performing at the sound of a signal. Participants performed about 50 events such as "turn on lights", "start engine", "stop engine", "open window". This work was performed at St Vincent College under the direction of Stephen Joy with Roman Verostko assisting. c. 1963 ( I can confirm that Fr Melvin Ruprecht participated. I believe it was before I went to Washington as NCE editor, rv0
</body>
</html>

136
textedit.py

@ -1,26 +1,132 @@
# this code is split in two parts:
# going through the description html files and gathering the interesting words in a json file;
# and going through the files again to replace words that also appear in the json with an a href version
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, os
import nltk
from nltk import word_tokenize
from nltk.util import trigrams
# text analysis
def analysis(file):
# print("yes")
file_trigrams = trigrams(content)
print(file_trigrams)
from nltk import sent_tokenize, word_tokenize
from nltk import everygrams
from nltk import FreqDist
import json
import re
"""
PART 1
We create the dictionary and save it.
"""
stopws = [",", ".", "?","!",":","(",")",">","<","@","#","``","/","","''","","-","", "DOCTYPE", "html", "!", "'", "<br>", "<br />", "/body", "/html", "/head", "h2", "/h2", "h1", "/h1","",""]
# reading each individual html file
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
total = open("allhtml.txt", "a")
with open(file) as f:
content = f.read()
analysis(content)
total.write(content)
total.close()
keyword_list = []
# with open('allhtml.txt') as f:
# content = f.read()
# tokens = word_tokenize(content)
# tokens = [token for token in tokens if token not in stopws]
# freq_file=FreqDist(tokens)
# print(tokens)
# keyword_list.append(freq_file.most_common(50))
# print(keyword_list[0])
with open('allhtml.txt') as f:
content = f.read()
tokens = word_tokenize(content)
tokens = [token for token in tokens if token not in stopws]
keyword_list = list(set(tokens))
# print(tokens)
# print(keyword_list)
"""
PART 2
We iterate through the entire collection of html files, tokenize the words, and check to see whether any of them is in the keyword_list. If they are, then we generate a json file.
"""
# wordlist = {}
# avoiding_repetition = []
sentences_w_word = {}
def analysis(the_word, file_name):
id = file_name[13:15]
with open(file_name) as f:
content = f.read()
sent_tokens = sent_tokenize(content)
new_sent_tokens = []
for sent_token in sent_tokens:
if the_word in sent_token:
new_sent_tokens.append({'id': id, 'sentence': sent_token.replace('\n', ' ').strip("'<>()")})
if the_word in sentences_w_word: # if this is not the first iteration
previous_sent_tokens = sentences_w_word[the_word]
full_sent_tokens = previous_sent_tokens + new_sent_tokens
else:
full_sent_tokens = new_sent_tokens
sentences_w_word[word] = full_sent_tokens
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
for word in keyword_list:
analysis(word, file)
with open('wordlist.json', 'w', encoding="utf8") as outfile:
json.dump(sentences_w_word, outfile, ensure_ascii=False)
# def analysis(file, id):
# sent_tokens = sent_tokenize(file) # sentence tokenizing
# for sent_token in sent_tokens:
# tokens = word_tokenize(sent_token) # word tokenizing
# print(tokens)
# for token in tokens:
# for first in keyword_list:
# if token == first: # if token is in keyword_list
# if token not in wordlist:
# wordlist[token] = []
# sent_dict = {}
# sent_dict["id"]=id
# sent_dict["sentence"] = sent_token.replace('\n', ' ')
# wordlist[token].append(sent_dict)
# elif token not in avoiding_repetition:
# # print(wordlist[token])
# sent_dict = {}
# sent_dict["id"]=id
# sent_dict["sentence"] = sent_token.replace('\n', ' ')
# wordlist[token].append(sent_dict)
# avoiding_repetition.append(token)
# with open('static/files/17/17.blurb.html') as f:
# content = f.read()
# analysis(content, '17')
# # reading each individual html file
# path = "static/files/"
# for path, subdirs, files in os.walk(path):
# for name in files:
# if name.endswith('html'):
# file = os.path.join(path, name)
# with open(file) as f:
# content = f.read()
# id=name[:2]
# analysis(content, id)
# json_wordlist = json.dumps(wordlist)
# for item in wordlist:
# for item2 in wordlist[item]:
# print(item)
# print(item2["sentence"])
# print("\n")

14
wordlist.json

File diff suppressed because one or more lines are too long

13
wordlist_v2.json

@ -1,13 +0,0 @@
{
"way" : [
{"id": ["17", "He described his own art as a way of 'ensuring that the details of everyday life, the random constellations of objects that surround us, stop going unnoticed.'"]},
{"id": ["00", "Our ambition, and that of our collaborating partners, is to emancipate graphic notation from the confines of the modernist tradition, in such a way that it may remain an innovative and provocative medium for decades to come."]}
],
"artwork" : [
{"id": ["17", "One of the originators of 'participatory' art, in which the artwork can only be experienced by the active involvement of the viewer, he is most famous for his Event Scores such as Drip Music 1962, and is widely seen as an important precursor to conceptual art."]},
{"id": ["00", "It unfolds through a nomadic program which includes the creation of newly commissioned artworks and public events that addres scontemporary questions and issues in this particular field.",
"The discursive program for 2016–2017 will include lectures, presentations of newly commissioned artworks, concert evenings, and workshops."]}
]
}

2
wordlist_v3.json

@ -16,6 +16,8 @@
}
],
"artwork" : [
{
"id" : "17",

Loading…
Cancel
Save