volumetric-regimes-book/command-line/update.py

351 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import urllib.request
import os
import re
import json
import jinja2
# Notes are here: https://pad.vvvvvvaria.org/volumetric-regimes-in-process.
STATIC_FOLDER_PATH = '.' # without trailing slash
PUBLIC_STATIC_FOLDER_PATH = '.' # without trailing slash
TEMPLATES_DIR = './templates'
# This uses a low quality copy of all the images
# (using a folder with the name "images-small",
# which stores a copy of all the images generated with:
# $ mogrify -quality 5% -adaptive-resize 25% -remap pattern:gray50 * )
fast = False
def API_request(url, pagename):
"""
url = API request url (string)
data = { 'query':
'pages' :
pageid : {
'links' : {
'?' : '?'
'title' : 'pagename'
}
}
}
}
"""
response = urllib.request.urlopen(url).read()
data = json.loads(response)
# Save response as JSON to be able to inspect API call
json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json'
print('Saving JSON:', json_file)
with open(json_file, 'w') as out:
out.write(json.dumps(data, indent=4))
out.close()
return data
def download_media(html, images, wiki):
"""
html = string (HTML)
images = list of filenames (str)
"""
# check if 'images/' already exists
if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
# tmp list for filename replacements
replaced = []
images.sort()
images.reverse() # reverse to make sure that 01.png does not override Image01.png in the filename replacements later
# download media files
for filename in images:
filename = filename.replace(' ', '_') # safe filenames
# check if the image is already downloaded
# if not, then download the file
# !!!!!
# turned off for preparing final files (AUG 2022)
# !!!!!
# if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'):
# # first we search for the full filename of the image
# url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
# response = urllib.request.urlopen(url).read()
# data = json.loads(response)
# # we select the first search result
# # (assuming that this is the image we are looking for)
# image = data['query']['allimages'][0]
# # then we download the image
# image_url = image['url']
# image_filename = image['name']
# print('Downloading:', image_filename)
# image_response = urllib.request.urlopen(image_url).read()
# # and we save it as a file
# image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }'
# out = open(image_path, 'wb')
# out.write(image_response)
# out.close()
# import time
# time.sleep(3) # do not overload the server
# replace src image link (from wiki folder structure to local folder)
image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
img_path_patterns = [rf'(?<!\.)/images/.*?px-{ filename }', rf'(?<!\.)/images/.*?{ filename }']
for img_path_pattern in img_path_patterns:
matches = re.findall(img_path_pattern, html) # for debugging
if matches:
for match in matches:
if match not in replaced:
# print(f' { match } --> { image_path }') # for debugging: each image should have the correct match!
html = html.replace(match, image_path)
replaced.append(match)
return html
def add_item_inventory_links(html):
"""
html = string (HTML)
"""
# THROUGHOUT THE BOOK
# Find all references in the text to the item index
matches = re.findall(r'\w.*?Item \d\d\d.*?\w\w\w', html) # Dodgy attempt to find unique patterns for each mentioning of Item ###
index = {}
for match in matches:
item_match = re.search(r'Item \d\d\d', match).group()
number = item_match.replace('Item ', '').strip()
text_before = re.search(rf'\w.*?Item { number }', match).group().replace(f'Item { number }', '')
text_after = re.search(rf'Item { number }.*?\w\w\w', match).group().replace(f'Item { number }', '')
if not number in index:
index[number] = []
count = 1
else:
count = index[number][-1] + 1
index[number].append(count)
item_id = f'ii-{ number }-{ index[number][-1] }'
# print(f'match: { number } --> { item_id } --> { match }')
html = html.replace(match, f'{ text_before }<a id="{ item_id }" href="#Item_Index">Item { number }</a>{ text_after }')
# IN THE ITEM INDEX
# Also add a <span> around the index nr to style it
matches = re.findall(r'<li>\d\d\d', html)
for match in matches:
html = html.replace(match, f'<li><span class="item_nr">{ match }</span>')
# print("\n-------------\n")
# print("The following items ('###') appear [#, #, ...] many times in the book:\n")
sorted_index = dict(sorted(index.items()))
# print(sorted_index)
# print("\n-------------\n")
return html
def tweaking(html):
"""
html = string (HTML)
"""
html = html.replace('<a href="#X,_y,_z_(4_filmstills)"', '<a href="#x,_y,_z_(4_filmstills)"') # change the anchor link in the TOC to lowercase
html = html.replace('<a href="#Rehearsal_as_the_%E2%80%98Other%E2%80%99_to_Hypercomputation"', '<a href="#Rehearsal_as_the_Other_to_Hypercomputation"') # change the anchor link in the TOC to lowercase
html = html.replace('<a href="#We_hardly_encounter_anything_that_didn%E2%80%99t_really_matter"', '<a href="#We_hardly_encounter_anything_that_didnt_really_matter"') # change the anchor link in the TOC to lowercase
html = re.sub(r'''<h3><span class="mw-headline" id="References.*?">References</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h3>
<ul>''', '''<h3 class="references"><span class="mw-headline" id="References">References</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h3>
<ul class="references">''', html) # add id="references" to h3 and ul, so the elements can be selected with CSS
html = html.replace('src="./images/Userinfo.jpg"', 'src="./images/Userinfo.svg"') # This image is not on the wiki
html = html.replace('srcset="./images/Userinfo.jpg 1.5x, ./images/Userinfo.jpg 2x"', 'srcset="./images/Userinfo.svg 1.5x, ./images/Userinfo.svg 2x"') # This image is not on the wiki
html = html.replace('src="./images/Continuum_brighton.png"', 'src="./images/Continuum_brighton.svg"') # This image is not on the wiki
html = html.replace('srcset="./images/Continuum_brighton.png 1.5x, ./images/Continuum_brighton.png 2x"', 'srcset="./images/Continuum_brighton.svg 1.5x, ./images/Continuum_brighton.svg 2x"') # This image is not on the wiki
# html = html.replace('src="./images/Topology-typography-1A.png"', 'src="./images/Topology-typography-1A.svg"') # This image is not on the wiki
# html = html.replace('src="./images/Topology-typography-1B.png"', 'src="./images/Topology-typography-1B.svg"') # This image is not on the wiki
# html = html.replace('src="./images/Topology-typography-2A.png"', 'src="./images/Topology-typography-2A.svg"') # This image is not on the wiki
# html = html.replace('src="./images/Topology-typography-2B.png"', 'src="./images/Topology-typography-2B.svg"') # This image is not on the wiki
# html = html.replace('srcset="./images/Topology-typography-1A.png"', 'srcset="./images/Topology-typography-1A.svg"') # This image is not on the wiki
# html = html.replace('srcset="./images/Topology-typography-1B.png"', 'srcset="./images/Topology-typography-1B.svg"') # This image is not on the wiki
# html = html.replace('srcset="./images/Topology-typography-2A.png"', 'srcset="./images/Topology-typography-2A.svg"') # This image is not on the wiki
# html = html.replace('srcset="./images/Topology-typography-2B.png"', 'srcset="./images/Topology-typography-2B.svg"') # This image is not on the wiki
html = html.replace('trans*feminis', 'trans✶feminis') # changing stars
html = html.replace('Trans*feminis', 'Trans✶feminis') # changing stars
html = html.replace('star (*)', 'star (✶)') # changing stars
html = html.replace('Our trans*feminist lens is sharpened by queer and anti-colonial sensibilities, and oriented towards (but not limited to) trans*generational, trans*media, trans*disciplinary, trans*geopolitical, trans*expertise, and trans*genealogical forms of study.', 'Our trans✶feminist lens is sharpened by queer and anti-colonial sensibilities, and oriented towards (but not limited to) trans✶generational, trans✶media, trans✶disciplinary, trans✶geopolitical, trans✶expertise, and trans✶genealogical forms of study.') # changing stars
html = html.replace('<h2><span class="mw-headline" id="Invasive_imagination_and_its_agential_cuts">Invasive imagination and its agential cuts</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Invasive_imagination_and_its_agential_cuts">Invasive imagination <br>and its agential cuts</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
html = html.replace('<h2><span class="mw-headline" id="Volumetric_Regimes:_Material_cultures_of_quantified_presence">Volumetric Regimes: Material cultures of quantified presence</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Volumetric_Regimes:_Material_cultures_of_quantified_presence">Volumetric Regimes:<br>Material cultures of<br>quantified presence</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
html = html.replace('<h2><span id="Somatopologies_(materials_for_a_movie_in_the_making)"></span><span class="mw-headline" id="Somatopologies_.28materials_for_a_movie_in_the_making.29">Somatopologies (materials for a movie in the making)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span id="Somatopologies_(materials_for_a_movie_in_the_making)"></span><span class="mw-headline" id="Somatopologies_.28materials_for_a_movie_in_the_making.29">Somatopologies (materials<br> for a movie in the making)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
html = html.replace('<h1><span class="mw-headline" id="Signs_of_clandestine_disorder:_The_continuous_aftermath_of_3D-computationalism"><a href="#Clandestine_disorder" title="Clandestine disorder">Signs of clandestine disorder: The continuous aftermath of 3D-computationalism</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Signs_of_clandestine_disorder:_The_continuous_aftermath_of_3D-computationalism"><a href="#Clandestine_disorder" title="Clandestine disorder">Signs of clandestine disorder:<br>The continuous<br>aftermath of 3D-<br>computationalism</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>')
html = html.replace('<h2><span class="mw-headline" id="The_Industrial_Continuum_of_3D">The Industrial Continuum of 3D</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="The_Industrial_Continuum_of_3D">The Industrial Continuum <br>of 3D</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>')
html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
html = html.replace('T*fRP', 'T✶fRP')
html = html.replace('trans*', 'trans✶')
html = html.replace('Trans*', 'trans✶')
html = html.replace('(*)', '(✶)')
html = html.replace('', '<span class="star">✶</span>')
html = html.replace('<p><a href="#File', '<p class="image"><a href="#File') # give <p>'s that contain an non-thumb image a .image class
html = html.replace(' ', '<span class="endash"> </span>') # control the white spaces around an endash
html = html.replace(' — so we do!”', ' — so<br>we do!”') # force line break
html = html.replace('I find gestationality useful and very exciting.', 'I find gestationality useful and very<br>exciting.') # force line break
html = html.replace('world.html https://docs.blender.org/manual/en/dev/rende', 'world.html<br>https://docs.blender.org/manual/en/dev/rende') # force line break
html = html.replace('Nerea Calvillo, Eric Snodgrass', 'Nerea Calvillo, Eric <br>Snodgrass') # force line break
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Missing sentences hack zone........
html = html.replace('from on-line hosting, designing, peer-reviewing', 'from on-line hosting, designing, </p><div class="page-break"></div><div class="no-text-indent"><p>peer-reviewing')
html = html.replace('''revolving of all matters.
</p>''', '''revolving of all matters.</p></div>
''')
html = html.replace('in an efficient manner, combining positivist science', 'in an efficient manner,<br />combining positivist science')
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
html = html.replace('src="./images/Barcode.png"', 'src="./cover/Barcode.svg"') # This image is not on the wiki
html = html.replace('src="./images/OHP-logo-title.png"', 'src="./cover/OHP-logo-title.svg"') # This image is not on the wiki
pattern1 = r'''<h2><span class="mw-headline" id=".*">.*</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>.*?</b>
</p>''' # title + author
pattern2 = r'''<h2><span class="mw-headline" id=".*?">.*?</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<h2 style="display:none;"><span class="mw-headline" id=".*?">.*?</span></h2>
<p><b>.*?</b>
</p>''' # exceptions: custom running headers
pattern3 = r'''<h2><span class="mw-headline" id=".*?">.*?</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>''' # only title
pattern4 = r'''<h2><span id="x,_y,_z_\(4_filmstills\)"></span><span class="mw-headline" id="x.2C_y.2C_z_.284_filmstills.29">x, y, z \(4 filmstills\)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Jara Rocha, Femke Snelting</b>
</p>'''
pattern5 = r'''<h2><span id="Somatopologies_\(materials_for_a_movie_in_the_making\)"></span><span class="mw-headline" id="Somatopologies_.28materials_for_a_movie_in_the_making.29">Somatopologies \(materials<br> for a movie in the making\)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Possible Bodies \(Jara Rocha, Femke Snelting\)</b>
</p>'''
pattern6 = r'''<h2><span id="Rehearsal_as_the_\Other\_to_Hypercomputation"></span><span class="mw-headline" id="Rehearsal_as_the_\.E2\.80\.98Other\.E2\.80\.99_to_Hypercomputation">Rehearsal as the \Other\ to Hypercomputation</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Maria Dada</b>
</p>'''
pattern7 = r'''<h2><span id="We_hardly_encounter_anything_that_didnt_really_matter"></span><span class="mw-headline" id="We_hardly_encounter_anything_that_didn\.E2\.80\.99t_really_matter">We hardly encounter anything that didnt really matter</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Phil Langley in conversation with Possible Bodies</b>
</p>'''
results = re.findall(rf'{pattern1}|{pattern2}|{pattern3}|{pattern4}|{pattern5}|{pattern6}|{pattern7}', html)
for match in results:
html = html.replace(match, f'<div class="title-wrapper">{ match }</div>')
# # add a "word break oppertunity" after each hyphen in compound words, but avoid url's and class names
# pattern = r'(?!\b[/|"])([a-z][a-z][a-z][-])'
# results = re.findall(rf'{ pattern }', html)
# for match in results:
# print(match)
# html = html.replace(match, f'{ match }<wbr>')
# html = html.replace('.png', '.jpg') # Using only jpg version of the images in the BW version, June 2022
# html = html.replace('.gif', '.jpg') # Using only jpg version of the images in the BW version, June 2022
html = html.replace('Topology-typography-1A.jpg', 'Topology-typography-1A.png') # Using png's for Spec, Aug 2022
html = html.replace('Topology-typography-1B.jpg', 'Topology-typography-1B.png') # Using png's for Spec, Aug 2022
html = html.replace('Topology-typography-2A.jpg', 'Topology-typography-2A.png') # Using png's for Spec, Aug 2022
html = html.replace('Topology-typography-2B.jpg', 'Topology-typography-2B.png') # Using png's for Spec, Aug 2022
html = html.replace('sky is black <br />and the ground is yellow.<br /><br />', 'sky is black <br />and the ground is yellow.<br />') # Aug 2022
html = html.replace('<div class="contribution 2.5d-romance">', '<div class="contribution romance">') # Aug 2022
html = html.replace(
'''</p><p>3D computation has historically co-evolved with Modern technosciences, and aligned with the regimes of optimization, normalization and hegemonic world order. The legacies and projections of industrial development leave traces of that imaginary and tell the stories of a lively tension between “the probable” and “the possible”. Defined as the techniques for measuring volumes, volumetrics all too easily (re)produce and accentuate the probable, and this process is intensified within the technocratic realm of contemporary hyper-computation.
</p><p>This book brings together diverse materials from an ongoing trans<span class="star">✶</span>feminist conversation between artists, software developers and theorists working with techniques and technologies for detecting, tracking, capturing, printing, modeling and rendering volumes.''',
'''</p><p>3D computation has historically co-evolved with Modern technosciences, and aligned with the regimes of optimiza-<br>tion, normalization and hegemonic world order. The lega-<br>cies and projections of industrial development leave traces of that imaginary and tell the stories of a lively tension <br>between “the probable” and “the possible”. Defined as the techniques for measuring volumes, volumetrics all too easily (re)produce and accentuate the probable, and this process is intensified within the technocratic realm of <br>contemporary hyper-computation.
</p><p>This book brings together diverse materials from an ongoing trans<span class="star">✶</span>feminist conversation between artists, software developers and theorists working with <br>techniques and technologies for detecting, tracking, capturing, printing, modeling and rendering volumes.''') # Aug 2022
return html
def clean_up(html):
"""
html = string (HTML)
"""
html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit]
html = re.sub(r'href="/index.php\?title=', 'href="#', html) # remove the internal wiki links
html = re.sub(r'&#91;(?=\d)', '', html) # remove left footnote bracket [
html = re.sub(r'(?<=\d)&#93;', '', html) # remove right footnote bracket ]
return html
def fast_loader(html):
"""
html = string (HTML)
"""
if fast == True:
html = html.replace('/images/', '/images-small/')
print('--- rendered in FAST mode ---')
return html
def parse_page(pagename, wiki):
"""
pagename = string
html = string (HTML)
"""
parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'
data = API_request(parse, pagename)
# print(json.dumps(data, indent=4))
if 'parse' in data:
html = data['parse']['text']['*']
images = data['parse']['images']
html = download_media(html, images, wiki)
html = clean_up(html)
html = add_item_inventory_links(html)
html = tweaking(html)
html = fast_loader(html)
else:
html = None
return html
def save(html, pagename):
"""
html = string (HTML)
pagename = string
"""
if __name__ == "__main__":
# command-line
# save final page that will be used with PagedJS
template_file = open(f'{ STATIC_FOLDER_PATH }/{ TEMPLATES_DIR }/template.html').read()
template = jinja2.Template(template_file)
doc = template.render(publication_unfolded=html, title=pagename)
html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html'
print('Saving HTML:', html_file)
with open(html_file, 'w') as out:
out.write(doc)
out.close()
# save extra html page for debugging (CLI only)
template_file = open(f'{ STATIC_FOLDER_PATH }/{ TEMPLATES_DIR }/template.inspect.html').read()
template = jinja2.Template(template_file)
doc = template.render(publication_unfolded=html, title=pagename)
html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html'
print('Saving HTML:', html_file)
with open(html_file, 'w') as out:
out.write(doc)
out.close()
else:
# Flask application
with open(f'{ STATIC_FOLDER_PATH }/Unfolded.html', 'w') as out:
out.write(html) # save the html to a file (without <head>)
def update_material_now(pagename, wiki):
"""
pagename = string
publication_unfolded = string (HTML)
"""
publication_unfolded = parse_page(pagename, wiki)
return publication_unfolded
# ---
if __name__ == "__main__":
wiki = 'https://volumetricregimes.xyz' # remove tail slash '/'
pagename = 'Unfolded'
publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page
save(publication_unfolded, pagename) # save the page to file