so many changes

This commit is contained in:
Cristina Cochior 2019-05-04 16:27:50 +02:00
parent e8fa4acbff
commit 313554644b
23 changed files with 41001 additions and 97 deletions

BIN
.DS_Store vendored

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

40821
allhtml.txt Normal file

File diff suppressed because one or more lines are too long

View File

@ -36,7 +36,7 @@ pathofwords = []
pathofnumbers = []
#reading wordlist.json
with open('wordlist_v3.json', 'r') as f:
with open('wordlist.json', 'r') as f:
wordlist_dict = json.load(f)
@ -118,10 +118,6 @@ def description():
if file.lower().endswith(('.html')):
with open("static/"+file,"r", encoding='utf-8') as f:
textfile = f.read()
word = "way"
wordlinked = "<a href='/diverge?search="+word+"'>"+word+"</a>"
textfile = re.sub(word, wordlinked, textfile)
textfile = Markup(textfile)
return render_template('description.html', datafromjson=datafromjson, itemid=itemid, textfile=textfile, idno=idno)

18
generate_links.py Normal file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
import sys, os
import json
import re
with open('wordlist.json', 'r', encoding='utf-8') as f:
wordlist_dict = json.load(f)
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
with open(file, encoding='utf-8') as f:
textfile = f.read()
for word in wordlist_dict:
wordlinked = "<a href='/diverge?search="+word+"'>"+word+"</a>"
textfile = re.sub(word, wordlinked, textfile)

View File

@ -1,38 +0,0 @@
import json
# # to iterate through existing json file and find the correct json file
# def find_json(id):
# get path/to/file
# return file
# #
# def save_json(id, name, email, friend, content):
# file
# data = {"id": "path/to/file", "name":,"email":,"friend":,"content":}
# with open('file.json', 'w') as f:
# json.dump(data, f)
# def jaction(original, id, name, email, friend, content):
# f = find_json_file(id)
# data = make_dict(f)
# updated = update_dict(data, name, email, friend, content)
# save_json_file(f, updated)
# # to find the file with the correct id
# def find_json_file():
# f = open('file.json', 'w')
# iterate files to find id
# return f
# # saving the json file
# def save_json_file(name, email, friend, content):
# dict= request.args.get(
# write(file, json.dump(data))

1
mostcommon.txt Normal file
View File

@ -0,0 +1 @@
[('graphic', 540), ('sound', 510), ('Rotterdam', 480), ('nl', 480), ('music', 450), ('notation', 420), ('project', 420), ('de', 390), ('new', 360), ('The', 360), ('DE', 360), ('PLAYER', 360), ('TGC', 330), ('art', 300), ('3', 300), ('van', 270), ('performance', 270), ('Gamma', 270), ('Circulaire', 270), ('event', 240), ('Tetra', 240), ("'", 240), ('score', 210), ('release', 210), ('Kris', 210), ('2017', 180), ('artists', 180), ('scores', 180), ('Antwerp', 180), ('2.0', 180), ('George', 180), ('I', 180), ('Remco', 150), ('Bladel', 150), ('For', 150), ('publishing', 150), ('Score', 150), ('us', 150), ('XPUB', 150), ('magazine', 150), ('Media', 150), ('2018', 150), ('Paradiso', 150), ('This', 150), ('research', 150), ('Vaast', 150), ('Colson', 150), ('Art', 150), ('avant-garde', 150), ('Remörk', 150)]

27
putallhtmltexttogether.py Normal file
View File

@ -0,0 +1,27 @@
import sys, os
from nltk import word_tokenize
from nltk import everygrams
from nltk import FreqDist
stopws = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't", ",", ".", "?","!",":","(",")",">","<","@","#","``","/","","''","","-","", "DOCTYPE", "html", "!"]
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
total = open("allhtml.txt", "a")
with open(file) as f:
content = f.read()
total.write(content)
total.close()
with open('allhtml.txt') as f:
content = f.read()
tokens = word_tokenize(content)
tokens = [token for token in tokens if token not in stopws]
freq_file=FreqDist(tokens)
listofwords = open("mostcommon.txt", "w+")
listofwords.write(str(freq_file.most_common(50)))
listofwords.close()

BIN
static/files/.DS_Store vendored

Binary file not shown.

View File

@ -0,0 +1,5 @@
Here is a description of the event / artist / work.
For the event, it can describe what happened, where it happened, when it happened.
For the artist, it can introduce the artist.
For the work, it can describe the content.

View File

@ -0,0 +1 @@
Why was the artist / piece / event included in the collection.

View File

@ -0,0 +1 @@
Here you can go into length about an artists practice. For example, interviews, personal statements, reviews etc.

View File

@ -1,4 +1,4 @@
<h1>Type of object: Event</h1>
Type of object: Event
Project:
Description: Presented at WIELS Art Book fair 2016 was this live made copy zine named "Carlson invents, Colson presents: 99 spines produced on a modified Canon IR2016 copy machine" by Vaast Colson. Produced on a 'prepared copier'. The copy machine is amplified by several internal microphones by which the sound of every run is recorded. Each copy-run of 99 copies (the maximum run of the machine) on transparent foil will be accompanied by a foil cover with the dub cut audiofile in it. The image copied in the zine is a drawing which is engraved in the glassplate of the copy machine.
Author: Vaast Colson, De Player

View File

@ -1,10 +1,4 @@
<!DOCTYPE html>
<html>
<head>
<title></title>
</head>
<body>
George Brecht (August 27, 1926 December 5, 2008), born George Ellis MacDiarmid, was an American conceptual artist and avant-garde composer,
as well as a professional chemist who worked as a consultant for companies including Pfizer, Johnson & Johnson, and Mobil Oil. He was a key member
of, and influence on, Fluxus, the international group of avant-garde artists centred on George Maciunas, having been involved with the group
@ -13,15 +7,10 @@ from the first performances in Wiesbaden 1962 until Maciunas' death in 1978.
One of the originators of 'participatory' art, in which the artwork can only be experienced by the active involvement of the viewer, he is most
famous for his Event Scores such as Drip Music 1962, and is widely seen as an important precursor to conceptual art. He described his own art as a way of “ensuring that the details of everyday life, the random constellations of objects that surround us, stop going unnoticed.”
<br />
<br />
<h2>SUNDOWN VEHICLE EVENT by GEORGE BRECHT</h2>
SUNDOWN VEHICLE EVENT by GEORGE BRECHT
Left: George Brecht, 1961, Two Vehicle Events, Detail of a 3.5 " by 4.5. card. One of many similar kinds of instructions that were given to participants.
Right: Vehicles, drivers & interested students gather before sunset for a performance of George Brecht's "Vehicle Sundown Event".
Location: St Vincent College, Latrobe, PA., parking area behind Sportsman's Hall, 1963.
Steve Joy took me to meet George Brecht in his studio when I was in residence at St Michael's in Manhattan (c.1962). We became friends and GB mailed instruction cards to me. I brought Steve Joy to St Vincent College when I returned to the monastery from Paris in 1963. GB agreed to provide instructions for an event at St Vincent. For his "Vehicle Sundown Event", GB published a set of about 50 cards to be given to participants who participated in the event with their vehicles. Each card held an instruction to be performed with a vehicle. Drivers were instructed to assemble at sundown in a parking lot and randomly park their vehicles. Then each driver, with a shuffled deck of instructions, would begin performing at the sound of a signal. Participants performed about 50 events such as "turn on lights", "start engine", "stop engine", "open window". This work was performed at St Vincent College under the direction of Stephen Joy with Roman Verostko assisting. c. 1963 ( I can confirm that Fr Melvin Ruprecht participated. I believe it was before I went to Washington as NCE editor, rv0
</body>
</html>

View File

@ -1,26 +1,132 @@
# this code is split in two parts:
# going through the description html files and gathering the interesting words in a json file;
# and going through the files again to replace words that also appear in the json with an a href version
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, os
import nltk
from nltk import word_tokenize
from nltk.util import trigrams
from nltk import sent_tokenize, word_tokenize
from nltk import everygrams
from nltk import FreqDist
import json
import re
# text analysis
def analysis(file):
# print("yes")
file_trigrams = trigrams(content)
print(file_trigrams)
"""
PART 1
We create the dictionary and save it.
"""
stopws = [",", ".", "?","!",":","(",")",">","<","@","#","``","/","","''","","-","", "DOCTYPE", "html", "!", "'", "<br>", "<br />", "/body", "/html", "/head", "h2", "/h2", "h1", "/h1","",""]
# reading each individual html file
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
total = open("allhtml.txt", "a")
with open(file) as f:
content = f.read()
analysis(content)
total.write(content)
total.close()
keyword_list = []
# with open('allhtml.txt') as f:
# content = f.read()
# tokens = word_tokenize(content)
# tokens = [token for token in tokens if token not in stopws]
# freq_file=FreqDist(tokens)
# print(tokens)
# keyword_list.append(freq_file.most_common(50))
# print(keyword_list[0])
with open('allhtml.txt') as f:
content = f.read()
tokens = word_tokenize(content)
tokens = [token for token in tokens if token not in stopws]
keyword_list = list(set(tokens))
# print(tokens)
# print(keyword_list)
"""
PART 2
We iterate through the entire collection of html files, tokenize the words, and check to see whether any of them is in the keyword_list. If they are, then we generate a json file.
"""
# wordlist = {}
# avoiding_repetition = []
sentences_w_word = {}
def analysis(the_word, file_name):
id = file_name[13:15]
with open(file_name) as f:
content = f.read()
sent_tokens = sent_tokenize(content)
new_sent_tokens = []
for sent_token in sent_tokens:
if the_word in sent_token:
new_sent_tokens.append({'id': id, 'sentence': sent_token.replace('\n', ' ').strip("'<>()")})
if the_word in sentences_w_word: # if this is not the first iteration
previous_sent_tokens = sentences_w_word[the_word]
full_sent_tokens = previous_sent_tokens + new_sent_tokens
else:
full_sent_tokens = new_sent_tokens
sentences_w_word[word] = full_sent_tokens
path = "static/files/"
for path, subdirs, files in os.walk(path):
for name in files:
if name.endswith('html'):
file = os.path.join(path, name)
for word in keyword_list:
analysis(word, file)
with open('wordlist.json', 'w', encoding="utf8") as outfile:
json.dump(sentences_w_word, outfile, ensure_ascii=False)
# def analysis(file, id):
# sent_tokens = sent_tokenize(file) # sentence tokenizing
# for sent_token in sent_tokens:
# tokens = word_tokenize(sent_token) # word tokenizing
# print(tokens)
# for token in tokens:
# for first in keyword_list:
# if token == first: # if token is in keyword_list
# if token not in wordlist:
# wordlist[token] = []
# sent_dict = {}
# sent_dict["id"]=id
# sent_dict["sentence"] = sent_token.replace('\n', ' ')
# wordlist[token].append(sent_dict)
# elif token not in avoiding_repetition:
# # print(wordlist[token])
# sent_dict = {}
# sent_dict["id"]=id
# sent_dict["sentence"] = sent_token.replace('\n', ' ')
# wordlist[token].append(sent_dict)
# avoiding_repetition.append(token)
# with open('static/files/17/17.blurb.html') as f:
# content = f.read()
# analysis(content, '17')
# # reading each individual html file
# path = "static/files/"
# for path, subdirs, files in os.walk(path):
# for name in files:
# if name.endswith('html'):
# file = os.path.join(path, name)
# with open(file) as f:
# content = f.read()
# id=name[:2]
# analysis(content, id)
# json_wordlist = json.dumps(wordlist)
# for item in wordlist:
# for item2 in wordlist[item]:
# print(item)
# print(item2["sentence"])
# print("\n")

File diff suppressed because one or more lines are too long

View File

@ -1,13 +0,0 @@
{
"way" : [
{"id": ["17", "He described his own art as a way of 'ensuring that the details of everyday life, the random constellations of objects that surround us, stop going unnoticed.'"]},
{"id": ["00", "Our ambition, and that of our collaborating partners, is to emancipate graphic notation from the confines of the modernist tradition, in such a way that it may remain an innovative and provocative medium for decades to come."]}
],
"artwork" : [
{"id": ["17", "One of the originators of 'participatory' art, in which the artwork can only be experienced by the active involvement of the viewer, he is most famous for his Event Scores such as Drip Music 1962, and is widely seen as an important precursor to conceptual art."]},
{"id": ["00", "It unfolds through a nomadic program which includes the creation of newly commissioned artworks and public events that addres scontemporary questions and issues in this particular field.",
"The discursive program for 20162017 will include lectures, presentations of newly commissioned artworks, concert evenings, and workshops."]}
]
}

View File

@ -16,6 +16,8 @@
}
],
"artwork" : [
{
"id" : "17",