volumetric-regimes-book/command-line/update.py

import urllib.request
import os
import re
import json
import jinja2

STATIC_FOLDER_PATH = '.' # without trailing slash
PUBLIC_STATIC_FOLDER_PATH = '.' # without trailing slash
WRAPPING_TEMPLATES_DIR = './templates'

# This uses a low quality copy of all the images 
# (using a folder with the name "images-small",
# which stores a copy of all the images generated with:
# $ mogrify -quality 5% -adaptive-resize 25% -remap pattern:gray50 * )
fast = False

def API_request(url, pagename):
	"""
		url = API request url (string)
		data =  { 'query': 
					'pages' : 
						pageid : { 					
							'links' : {
								'?' : '?'
								'title' : 'pagename'
							}
						} 
					}  
				}
	"""
	response = urllib.request.urlopen(url).read()
	data = json.loads(response)

	# Save response as JSON to be able to inspect API call
	json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json'
	print('Saving JSON:', json_file)
	with open(json_file, 'w') as out:
		out.write(json.dumps(data, indent=4))
		out.close()

	return data

def download_media(html, images, wiki):
	"""
		html = string (HTML)
		images = list of filenames (str)
	"""
	# check if 'images/' already exists
	if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
		os.makedirs(f'{ STATIC_FOLDER_PATH }/images')

	# download media files
	for filename in images:
		filename = filename.replace(' ', '_') # safe filenames

		# check if the image is already downloaded
		# if not, then download the file
		if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'):

			# first we search for the full filename of the image
			url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
			response = urllib.request.urlopen(url).read()
			data = json.loads(response)

			# we select the first search result
			# (assuming that this is the image we are looking for)
			image = data['query']['allimages'][0] 

			# then we download the image
			image_url = image['url']
			image_filename = image['name']
			print('Downloading:', image_filename)
			image_response = urllib.request.urlopen(image_url).read()

			# and we save it as a file
			image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }'
			out = open(image_path, 'wb') 
			out.write(image_response)
			out.close()

			import time
			time.sleep(3) # do not overload the server

		# replace src link
		image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
		matches = re.findall(rf'src="/book/images/.*?px-{ filename }"', html) # for debugging
		if matches:
			html = re.sub(rf'src="/book/images/.*?px-{ filename }"', f'src="{ image_path }"', html)
		else:
			matches = re.findall(rf'src="/book/images/.*?{ filename }"', html) # for debugging
			html = re.sub(rf'src="/book/images/.*?{ filename }"', f'src="{ image_path }"', html) 
		# print(f'{filename}: {matches}\n------') # for debugging: each image should have the correct match!

	return html

def insert_variable_geometry(html):
	vg = """
<script>
	/*Sketch.js from the Chapter Variable Geometry in Aesthetic Programming - A Handbook of Software Studies, by Winnie Soon & Geoff Cox (2020) - http://aesthetic-programming.net/*/

	/*Inspired by David Reinfurt's work - Multi*/
	let moving_size = 50;
	let static_size = 20;

	function setup() {
		createCanvas(windowWidth, windowHeight);
		frameRate(15);
	}

	function draw() {
		//background
		background(230);
		//left
		noStroke()
		fill(0);
		rect(97, 169, 79, 12);

		//right
		rect(365, 184, 20, 15);
		fill(20, 20, 120);

		beginShape();
		vertex(365, 199);
		vertex(385, 199);
		vertex(372, 216);
		vertex(358, 216);
		endShape(CLOSE);

		//bottom
		noFill();
		stroke(130);
		strokeWeight(2);
		ellipse(255, 350, static_size, static_size);

		//mouse interactions
		stroke(180);
		ellipse(mouseX, mouseY, moving_size, moving_size);

		if (mouseIsPressed) {
			static_size = floor(random(5, 20));
		}
	}
</script>"""
	html = html.replace("$multi", vg)
	return html

def add_item_inventory_links(html):
	"""
		html = string (HTML)
	"""
	# Find all references in the text to the item index
	pattern = r'Item \d\d\d'
	matches = re.findall(pattern, html)
	index = {}
	new_html = ''
	from nltk.tokenize import sent_tokenize
	for line in sent_tokenize(html):
		for match in matches:
			if match in line:
				number = match.replace('Item ', '').strip()
				if not number in index:
					index[number] = []
					count = 1
				else:
					count = index[number][-1] + 1
				index[number].append(count)
				item_id = f'ii-{ number }-{ index[number][-1] }'
				line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')

		# the line is pushed back to the new_html
		new_html += line + ' '
		
	# Also add a <span> around the index nr to style it
	matches = re.findall(r'<li>\d\d\d', new_html)
	for match in matches:
		new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')

	# import json
	# print(json.dumps(index, indent=4))
	
	return new_html

def clean_up(html):
	"""
		html = string (HTML)
	"""
	html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit]
	html = re.sub(r'href="/book/index.php\?title=', 'href="#', html) # remove the internal wiki links
	html = re.sub(r'&#91;(?=\d)', '', html) # remove left footnote bracket [
	html = re.sub(r'(?<=\d)&#93;', '', html) # remove right footnote bracket ]
	return html

def fast_loader(html):
	"""
		html = string (HTML)
	"""
	if fast == True:
		html = html.replace('/images/', '/images-small/')
		print('--- rendered in FAST mode ---')

	return html

def parse_page(pagename, wiki):
	"""
		pagename = string
		html = string (HTML)
	"""
	parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'
	data = API_request(parse, pagename)
	# print(json.dumps(data, indent=4))
	if 'parse' in data:
		html = data['parse']['text']['*']
		images = data['parse']['images']
		html = download_media(html, images, wiki)
		html = clean_up(html)
		html = add_item_inventory_links(html)
		# html = insert_variable_geometry(html)
		html = fast_loader(html)
	else: 
		html = None

	return html

def save(html, pagename, publication_unfolded):
	"""
		html = string (HTML)
		pagename = string
	"""
	if html:

		# save final page that will be used with PagedJS
		template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.html').read()
		template = jinja2.Template(template_file)
		html = template.render(publication_unfolded=publication_unfolded, title=pagename)
		
		html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html'
		print('Saving HTML:', html_file)
		with open(html_file, 'w') as out:
			out.write(html)
			out.close()

		# save extra html page for debugging
		template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.inspect.html').read()
		template = jinja2.Template(template_file)
		html = template.render(publication_unfolded=publication_unfolded, title=pagename)

		html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html'
		print('Saving HTML:', html_file)
		with open(html_file, 'w') as out:
			out.write(html)
			out.close()

def update_material_now(pagename, wiki):
	"""
		pagename = string
		publication_unfolded = string (HTML)
	"""
	publication_unfolded = parse_page(pagename, wiki)

	return publication_unfolded

# ---

if __name__ == "__main__":

	wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'
	pagename = 'Unfolded'
	
	publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page
	save(publication_unfolded, pagename, publication_unfolded) # save the page to file
here we go :) 3 years ago			`import urllib.request`
			`import os`
			`import re`
			`import json`
			`import jinja2`

syncing the update script between the web-interface and command line version 3 years ago			`STATIC_FOLDER_PATH = '.' # without trailing slash`
added a public static folder path for flask to not break the image links 3 years ago			`PUBLIC_STATIC_FOLDER_PATH = '.' # without trailing slash`
syncing the update script between the web-interface and command line version 3 years ago			`WRAPPING_TEMPLATES_DIR = './templates'`

added item inventory links + fast loader mode 3 years ago			`# This uses a low quality copy of all the images`
			`# (using a folder with the name "images-small",`
			`# which stores a copy of all the images generated with:`
			`# $ mogrify -quality 5% -adaptive-resize 25% -remap pattern:gray50 * )`
			`fast = False`

syncing the update script between the web-interface and command line version 3 years ago			`def API_request(url, pagename):`
here we go :) 3 years ago			`"""`
			`url = API request url (string)`
			`data = { 'query':`
			`'pages' :`
			`pageid : {`
			`'links' : {`
			`'?' : '?'`
			`'title' : 'pagename'`
			`}`
			`}`
			`}`
			`}`
			`"""`
			`response = urllib.request.urlopen(url).read()`
			`data = json.loads(response)`

			`# Save response as JSON to be able to inspect API call`
syncing the update script between the web-interface and command line version 3 years ago			`json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json'`
here we go :) 3 years ago			`print('Saving JSON:', json_file)`
			`with open(json_file, 'w') as out:`
			`out.write(json.dumps(data, indent=4))`
			`out.close()`

			`return data`

syncing the update script between the web-interface and command line version 3 years ago			`def download_media(html, images, wiki):`
here we go :) 3 years ago			`"""`
			`html = string (HTML)`
			`images = list of filenames (str)`
			`"""`
			`# check if 'images/' already exists`
syncing the update script between the web-interface and command line version 3 years ago			`if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):`
			`os.makedirs(f'{ STATIC_FOLDER_PATH }/images')`
here we go :) 3 years ago
			`# download media files`
			`for filename in images:`
			`filename = filename.replace(' ', '_') # safe filenames`

			`# check if the image is already downloaded`
			`# if not, then download the file`
syncing the update script between the web-interface and command line version 3 years ago			`if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'):`
here we go :) 3 years ago
			`# first we search for the full filename of the image`
			`url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'`
			`response = urllib.request.urlopen(url).read()`
			`data = json.loads(response)`

			`# we select the first search result`
			`# (assuming that this is the image we are looking for)`
pick FIRST image from the search result list 3 years ago			`image = data['query']['allimages'][0]`
here we go :) 3 years ago
			`# then we download the image`
			`image_url = image['url']`
			`image_filename = image['name']`
			`print('Downloading:', image_filename)`
			`image_response = urllib.request.urlopen(image_url).read()`

			`# and we save it as a file`
syncing the update script between the web-interface and command line version 3 years ago			`image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }'`
here we go :) 3 years ago			`out = open(image_path, 'wb')`
			`out.write(image_response)`
			`out.close()`

			`import time`
			`time.sleep(3) # do not overload the server`

			`# replace src link`
added a public static folder path for flask to not break the image links 3 years ago			`image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file`
new regex, to avoid overriding Image01.png with 01.png 3 years ago			`matches = re.findall(rf'src="/book/images/.*?px-{ filename }"', html) # for debugging`
			`if matches:`
			`html = re.sub(rf'src="/book/images/.*?px-{ filename }"', f'src="{ image_path }"', html)`
			`else:`
			`matches = re.findall(rf'src="/book/images/.*?{ filename }"', html) # for debugging`
			`html = re.sub(rf'src="/book/images/.*?{ filename }"', f'src="{ image_path }"', html)`
			`# print(f'{filename}: {matches}\n------') # for debugging: each image should have the correct match!`
here we go :) 3 years ago
			`return html`

pushing updates 3 years ago			`def insert_variable_geometry(html):`
			`vg = """`
			`<script>`
			`/Sketch.js from the Chapter Variable Geometry in Aesthetic Programming - A Handbook of Software Studies, by Winnie Soon & Geoff Cox (2020) - http://aesthetic-programming.net//`

			`/Inspired by David Reinfurt's work - Multi/`
			`let moving_size = 50;`
			`let static_size = 20;`

			`function setup() {`
			`createCanvas(windowWidth, windowHeight);`
			`frameRate(15);`
			`}`

			`function draw() {`
			`//background`
			`background(230);`
			`//left`
			`noStroke()`
			`fill(0);`
			`rect(97, 169, 79, 12);`

			`//right`
			`rect(365, 184, 20, 15);`
			`fill(20, 20, 120);`

			`beginShape();`
			`vertex(365, 199);`
			`vertex(385, 199);`
			`vertex(372, 216);`
			`vertex(358, 216);`
			`endShape(CLOSE);`

			`//bottom`
			`noFill();`
			`stroke(130);`
			`strokeWeight(2);`
			`ellipse(255, 350, static_size, static_size);`

			`//mouse interactions`
			`stroke(180);`
			`ellipse(mouseX, mouseY, moving_size, moving_size);`

			`if (mouseIsPressed) {`
			`static_size = floor(random(5, 20));`
			`}`
			`}`
			`</script>"""`
			`html = html.replace("$multi", vg)`
			`return html`

			`def add_item_inventory_links(html):`
started with a splitter for h2 headers 3 years ago			`"""`
			`html = string (HTML)`
			`"""`
added item inventory links + fast loader mode 3 years ago			`# Find all references in the text to the item index`
pushing updates 3 years ago			`pattern = r'Item \d\d\d'`
			`matches = re.findall(pattern, html)`
			`index = {}`
added item inventory links + fast loader mode 3 years ago			`new_html = ''`
			`from nltk.tokenize import sent_tokenize`
			`for line in sent_tokenize(html):`
			`for match in matches:`
			`if match in line:`
			`number = match.replace('Item ', '').strip()`
			`if not number in index:`
			`index[number] = []`
			`count = 1`
			`else:`
			`count = index[number][-1] + 1`
			`index[number].append(count)`
			`item_id = f'ii-{ number }-{ index[number][-1] }'`
			`line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')`

			`# the line is pushed back to the new_html`
			`new_html += line + ' '`

			`# Also add a <span> around the index nr to style it`
			`matches = re.findall(r'<li>\d\d\d', new_html)`
pushing updates 3 years ago			`for match in matches:`
added item inventory links + fast loader mode 3 years ago			`new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')`

			`# import json`
			`# print(json.dumps(index, indent=4))`

			`return new_html`
started with a splitter for h2 headers 3 years ago
here we go :) 3 years ago			`def clean_up(html):`
			`"""`
			`html = string (HTML)`
			`"""`
			`html = re.sub(r'\[.edit.\]', '', html) # remove the [edit]`
syncing the update script between the web-interface and command line version 3 years ago			`html = re.sub(r'href="/book/index.php\?title=', 'href="#', html) # remove the internal wiki links`
adding a regex to remove the [] from footnotes 3 years ago			`html = re.sub(r'[(?=\d)', '', html) # remove left footnote bracket [`
			`html = re.sub(r'(?<=\d)]', '', html) # remove right footnote bracket ]`
here we go :) 3 years ago			`return html`

added item inventory links + fast loader mode 3 years ago			`def fast_loader(html):`
			`"""`
			`html = string (HTML)`
			`"""`
			`if fast == True:`
			`html = html.replace('/images/', '/images-small/')`
			`print('--- rendered in FAST mode ---')`

			`return html`

syncing the update script between the web-interface and command line version 3 years ago			`def parse_page(pagename, wiki):`
here we go :) 3 years ago			`"""`
			`pagename = string`
			`html = string (HTML)`
			`"""`
			`parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'`
syncing the update script between the web-interface and command line version 3 years ago			`data = API_request(parse, pagename)`
here we go :) 3 years ago			`# print(json.dumps(data, indent=4))`
			`if 'parse' in data:`
			`html = data['parse']['text']['*']`
			`images = data['parse']['images']`
syncing the update script between the web-interface and command line version 3 years ago			`html = download_media(html, images, wiki)`
here we go :) 3 years ago			`html = clean_up(html)`
pushing updates 3 years ago			`html = add_item_inventory_links(html)`
			`# html = insert_variable_geometry(html)`
added item inventory links + fast loader mode 3 years ago			`html = fast_loader(html)`
here we go :) 3 years ago			`else:`
			`html = None`

			`return html`

syncing the update script between the web-interface and command line version 3 years ago			`def save(html, pagename, publication_unfolded):`
here we go :) 3 years ago			`"""`
			`html = string (HTML)`
			`pagename = string`
			`"""`
			`if html:`

			`# save final page that will be used with PagedJS`
syncing the update script between the web-interface and command line version 3 years ago			`template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.html').read()`
here we go :) 3 years ago			`template = jinja2.Template(template_file)`
			`html = template.render(publication_unfolded=publication_unfolded, title=pagename)`

syncing the update script between the web-interface and command line version 3 years ago			`html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html'`
here we go :) 3 years ago			`print('Saving HTML:', html_file)`
			`with open(html_file, 'w') as out:`
			`out.write(html)`
			`out.close()`

			`# save extra html page for debugging`
syncing the update script between the web-interface and command line version 3 years ago			`template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.inspect.html').read()`
here we go :) 3 years ago			`template = jinja2.Template(template_file)`
			`html = template.render(publication_unfolded=publication_unfolded, title=pagename)`

syncing the update script between the web-interface and command line version 3 years ago			`html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html'`
here we go :) 3 years ago			`print('Saving HTML:', html_file)`
			`with open(html_file, 'w') as out:`
			`out.write(html)`
			`out.close()`

syncing the update script between the web-interface and command line version 3 years ago			`def update_material_now(pagename, wiki):`
here we go :) 3 years ago			`"""`
			`pagename = string`
			`publication_unfolded = string (HTML)`
			`"""`
syncing the update script between the web-interface and command line version 3 years ago			`publication_unfolded = parse_page(pagename, wiki)`
here we go :) 3 years ago
			`return publication_unfolded`

			`# ---`

syncing the update script between the web-interface and command line version 3 years ago			`if __name__ == "__main__":`
here we go :) 3 years ago
syncing the update script between the web-interface and command line version 3 years ago			`wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'`
			`pagename = 'Unfolded'`

			`publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page`
			`save(publication_unfolded, pagename, publication_unfolded) # save the page to file`
here we go :) 3 years ago