import urllib.request
import os
import re
import json
import jinja2

# Notes are here: https://pad.vvvvvvaria.org/volumetric-regimes-in-process.

STATIC_FOLDER_PATH = '.' # without trailing slash
PUBLIC_STATIC_FOLDER_PATH = '.' # without trailing slash
TEMPLATES_DIR = './templates'

# This uses a low quality copy of all the images 
# (using a folder with the name "images-small",
# which stores a copy of all the images generated with:
# $ mogrify -quality 5% -adaptive-resize 25% -remap pattern:gray50 * )
fast = False

def API_request(url, pagename):
	"""
		url = API request url (string)
		data =  { 'query': 
					'pages' : 
						pageid : { 					
							'links' : {
								'?' : '?'
								'title' : 'pagename'
							}
						} 
					}  
				}
	"""
	response = urllib.request.urlopen(url).read()
	data = json.loads(response)

	# Save response as JSON to be able to inspect API call
	json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json'
	print('Saving JSON:', json_file)
	with open(json_file, 'w') as out:
		out.write(json.dumps(data, indent=4))
		out.close()

	return data

def download_media(html, images, wiki):
	"""
		html = string (HTML)
		images = list of filenames (str)
	"""
	# check if 'images/' already exists
	if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
		os.makedirs(f'{ STATIC_FOLDER_PATH }/images')

	# tmp list for filename replacements 
	replaced = []
	
	images.sort()
	images.reverse() # reverse to make sure that 01.png does not override Image01.png in the filename replacements later

	# download media files
	for filename in images:
		filename = filename.replace(' ', '_') # safe filenames

		# check if the image is already downloaded
		# if not, then download the file
		# !!!!!
		# turned off for preparing final files (AUG 2022)
		# !!!!!
		# if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'):

		# 	# first we search for the full filename of the image
		# 	url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
		# 	response = urllib.request.urlopen(url).read()
		# 	data = json.loads(response)

		# 	# we select the first search result
		# 	# (assuming that this is the image we are looking for)
		# 	image = data['query']['allimages'][0] 

		# 	# then we download the image
		# 	image_url = image['url']
		# 	image_filename = image['name']
		# 	print('Downloading:', image_filename)
		# 	image_response = urllib.request.urlopen(image_url).read()

		# 	# and we save it as a file
		# 	image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }'
		# 	out = open(image_path, 'wb') 
		# 	out.write(image_response)
		# 	out.close()

		# 	import time
		# 	time.sleep(3) # do not overload the server

		# replace src image link (from wiki folder structure to local folder)
		image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
		
		img_path_patterns = [rf'(?<!\.)/images/.*?px-{ filename }', rf'(?<!\.)/images/.*?{ filename }']
		for img_path_pattern in img_path_patterns:
			matches = re.findall(img_path_pattern, html) # for debugging
			if matches:
				for match in matches:
					if match not in replaced:
						# print(f'    { match } --> { image_path }') # for debugging: each image should have the correct match!
						html = html.replace(match, image_path)
						replaced.append(match)

	return html

def add_item_inventory_links(html):
	"""
		html = string (HTML)
	"""
	# THROUGHOUT THE BOOK
	# Find all references in the text to the item index
	matches = re.findall(r'\w.*?Item \d\d\d.*?\w\w\w', html) # Dodgy attempt to find unique patterns for each mentioning of Item ###
	index = {}
	for match in matches:
		item_match = re.search(r'Item \d\d\d', match).group()
		number = item_match.replace('Item ', '').strip()
		text_before = re.search(rf'\w.*?Item { number }', match).group().replace(f'Item { number }', '')
		text_after = re.search(rf'Item { number }.*?\w\w\w', match).group().replace(f'Item { number }', '')
		if not number in index:
			index[number] = []
			count = 1
		else:
			count = index[number][-1] + 1
		index[number].append(count)
		item_id = f'ii-{ number }-{ index[number][-1] }'
		# print(f'match: { number } --> { item_id } --> { match }')
		html = html.replace(match, f'{ text_before }<a id="{ item_id }" href="#Item_Index">Item { number }</a>{ text_after }')

	# IN THE ITEM INDEX
	# Also add a <span> around the index nr to style it
	matches = re.findall(r'<li>\d\d\d', html)
	for match in matches:
		html = html.replace(match, f'<li><span class="item_nr">{ match }</span>')

	# print("\n-------------\n")
	# print("The following items ('###') appear [#, #, ...] many times in the book:\n")
	sorted_index = dict(sorted(index.items()))
	# print(sorted_index)
	# print("\n-------------\n")
	
	return html

def tweaking(html):
	"""
		html = string (HTML)
	"""
	html = html.replace('<a href="#X,_y,_z_(4_filmstills)"', '<a href="#x,_y,_z_(4_filmstills)"') # change the anchor link in the TOC to lowercase
	html = html.replace('<a href="#Rehearsal_as_the_%E2%80%98Other%E2%80%99_to_Hypercomputation"', '<a href="#Rehearsal_as_the_‘Other’_to_Hypercomputation"') # change the anchor link in the TOC to lowercase
	html = html.replace('<a href="#We_hardly_encounter_anything_that_didn%E2%80%99t_really_matter"', '<a href="#We_hardly_encounter_anything_that_didn’t_really_matter"') # change the anchor link in the TOC to lowercase
	html = re.sub(r'''<h3><span class="mw-headline" id="References.*?">References</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h3>
<ul>''', '''<h3 class="references"><span class="mw-headline" id="References">References</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h3>
<ul class="references">''', html) # add id="references" to h3 and ul, so the elements can be selected with CSS
	html = html.replace('src="./images/Userinfo.jpg"', 'src="./images/Userinfo.svg"') # This image is not on the wiki
	html = html.replace('srcset="./images/Userinfo.jpg 1.5x, ./images/Userinfo.jpg 2x"', 'srcset="./images/Userinfo.svg 1.5x, ./images/Userinfo.svg 2x"') # This image is not on the wiki
	html = html.replace('src="./images/Continuum_brighton.png"', 'src="./images/Continuum_brighton.svg"') # This image is not on the wiki
	html = html.replace('srcset="./images/Continuum_brighton.png 1.5x, ./images/Continuum_brighton.png 2x"', 'srcset="./images/Continuum_brighton.svg 1.5x, ./images/Continuum_brighton.svg 2x"') # This image is not on the wiki
	# html = html.replace('src="./images/Topology-typography-1A.png"', 'src="./images/Topology-typography-1A.svg"') # This image is not on the wiki
	# html = html.replace('src="./images/Topology-typography-1B.png"', 'src="./images/Topology-typography-1B.svg"') # This image is not on the wiki
	# html = html.replace('src="./images/Topology-typography-2A.png"', 'src="./images/Topology-typography-2A.svg"') # This image is not on the wiki
	# html = html.replace('src="./images/Topology-typography-2B.png"', 'src="./images/Topology-typography-2B.svg"') # This image is not on the wiki
	# html = html.replace('srcset="./images/Topology-typography-1A.png"', 'srcset="./images/Topology-typography-1A.svg"') # This image is not on the wiki
	# html = html.replace('srcset="./images/Topology-typography-1B.png"', 'srcset="./images/Topology-typography-1B.svg"') # This image is not on the wiki
	# html = html.replace('srcset="./images/Topology-typography-2A.png"', 'srcset="./images/Topology-typography-2A.svg"') # This image is not on the wiki
	# html = html.replace('srcset="./images/Topology-typography-2B.png"', 'srcset="./images/Topology-typography-2B.svg"') # This image is not on the wiki
	html = html.replace('trans*feminis', 'trans✶feminis') # changing stars
	html = html.replace('Trans*feminis', 'Trans✶feminis') # changing stars
	html = html.replace('star (*)', 'star (✶)') # changing stars
	html = html.replace('Our trans*feminist lens is sharpened by queer and anti-colonial sensibilities, and oriented towards (but not limited to) trans*generational, trans*media, trans*disciplinary, trans*geopolitical, trans*expertise, and trans*genealogical forms of study.', 'Our trans✶feminist lens is sharpened by queer and anti-colonial sensibilities, and oriented towards (but not limited to) trans✶generational, trans✶media, trans✶disciplinary, trans✶geopolitical, trans✶expertise, and trans✶genealogical forms of study.') # changing stars
	html = html.replace('<h2><span class="mw-headline" id="Invasive_imagination_and_its_agential_cuts">Invasive imagination and its agential cuts</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Invasive_imagination_and_its_agential_cuts">Invasive imagination <br>and its agential cuts</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
	html = html.replace('<h2><span class="mw-headline" id="Volumetric_Regimes:_Material_cultures_of_quantified_presence">Volumetric Regimes: Material cultures of quantified presence</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Volumetric_Regimes:_Material_cultures_of_quantified_presence">Volumetric Regimes:<br>Material cultures of<br>quantified presence</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
	html = html.replace('<h2><span id="Somatopologies_(materials_for_a_movie_in_the_making)"></span><span class="mw-headline" id="Somatopologies_.28materials_for_a_movie_in_the_making.29">Somatopologies (materials for a movie in the making)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span id="Somatopologies_(materials_for_a_movie_in_the_making)"></span><span class="mw-headline" id="Somatopologies_.28materials_for_a_movie_in_the_making.29">Somatopologies (materials<br> for a movie in the making)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
	html = html.replace('<h1><span class="mw-headline" id="Signs_of_clandestine_disorder:_The_continuous_aftermath_of_3D-computationalism"><a href="#Clandestine_disorder" title="Clandestine disorder">Signs of clandestine disorder: The continuous aftermath of 3D-computationalism</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Signs_of_clandestine_disorder:_The_continuous_aftermath_of_3D-computationalism"><a href="#Clandestine_disorder" title="Clandestine disorder">Signs of clandestine disorder:<br>The continuous<br>aftermath of 3D-<br>computationalism</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>') 
	html = html.replace('<h2><span class="mw-headline" id="The_Industrial_Continuum_of_3D">The Industrial Continuum of 3D</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="The_Industrial_Continuum_of_3D">The Industrial Continuum <br>of 3D</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
	html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>') 
	html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
	html = html.replace('T*fRP', 'T✶fRP') 
	html = html.replace('trans*', 'trans✶') 
	html = html.replace('Trans*', 'trans✶') 
	html = html.replace('(*)', '(✶)') 
	html = html.replace('✶', '<span class="star">✶</span>') 
	html = html.replace('<p><a href="#File', '<p class="image"><a href="#File') # give <p>'s that contain an non-thumb image a .image class
	html = html.replace(' – ', '<span class="endash"> – </span>') # control the white spaces around an endash
	html = html.replace(' — so we do!”', ' — so<br>we do!”') # force line break
	html = html.replace('I find gestationality useful and very exciting.', 'I find gestationality useful and very<br>exciting.') # force line break
	html = html.replace('world.html https://docs.blender.org/manual/en/dev/rende', 'world.html<br>https://docs.blender.org/manual/en/dev/rende') # force line break
	html = html.replace('Nerea Calvillo, Eric Snodgrass', 'Nerea Calvillo, Eric <br>Snodgrass') # force line break

	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
	# Missing sentences hack zone........
	html = html.replace('from on-line hosting, designing, peer-reviewing', 'from on-line hosting, designing, </p><div class="page-break"></div><div class="no-text-indent"><p>peer-reviewing')
	html = html.replace('''revolving of all matters.
</p>''', '''revolving of all matters.</p></div>
''')
	html = html.replace('in an efficient manner, combining positivist science', 'in an efficient manner,<br />combining positivist science')

	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	html = html.replace('src="./images/Barcode.png"', 'src="./cover/Barcode.svg"') # This image is not on the wiki
	html = html.replace('src="./images/OHP-logo-title.png"', 'src="./cover/OHP-logo-title.svg"') # This image is not on the wiki

	pattern1 = r'''<h2><span class="mw-headline" id=".*">.*</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>.*?</b>
</p>''' # title + author 
	pattern2 = r'''<h2><span class="mw-headline" id=".*?">.*?</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<h2 style="display:none;"><span class="mw-headline" id=".*?">.*?</span></h2>
<p><b>.*?</b>
</p>''' # exceptions: custom running headers
	pattern3 = r'''<h2><span class="mw-headline" id=".*?">.*?</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>''' # only title
	pattern4 = r'''<h2><span id="x,_y,_z_\(4_filmstills\)"></span><span class="mw-headline" id="x.2C_y.2C_z_.284_filmstills.29">x, y, z \(4 filmstills\)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Jara Rocha, Femke Snelting</b>
</p>''' 
	pattern5 = r'''<h2><span id="Somatopologies_\(materials_for_a_movie_in_the_making\)"></span><span class="mw-headline" id="Somatopologies_.28materials_for_a_movie_in_the_making.29">Somatopologies \(materials<br> for a movie in the making\)</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Possible Bodies \(Jara Rocha, Femke Snelting\)</b>
</p>''' 
	pattern6 = r'''<h2><span id="Rehearsal_as_the_\‘Other\’_to_Hypercomputation"></span><span class="mw-headline" id="Rehearsal_as_the_\.E2\.80\.98Other\.E2\.80\.99_to_Hypercomputation">Rehearsal as the \‘Other\’ to Hypercomputation</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Maria Dada</b>
</p>''' 
	pattern7 = r'''<h2><span id="We_hardly_encounter_anything_that_didn’t_really_matter"></span><span class="mw-headline" id="We_hardly_encounter_anything_that_didn\.E2\.80\.99t_really_matter">We hardly encounter anything that didn’t really matter</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>
<p><b>Phil Langley in conversation with Possible Bodies</b>
</p>'''

	results = re.findall(rf'{pattern1}|{pattern2}|{pattern3}|{pattern4}|{pattern5}|{pattern6}|{pattern7}', html)
	for match in results:
		html = html.replace(match, f'<div class="title-wrapper">{ match }</div>')

	# # add a "word break oppertunity" after each hyphen in compound words, but avoid url's and class names
	# pattern = r'(?!\b[/|"])([a-z][a-z][a-z][-])' 
	# results = re.findall(rf'{ pattern }', html)
	# for match in results:
	# 	print(match)
	# 	html = html.replace(match, f'{ match }<wbr>')

	# html = html.replace('.png', '.jpg') # Using only jpg version of the images in the BW version, June 2022
	# html = html.replace('.gif', '.jpg') # Using only jpg version of the images in the BW version, June 2022
	html = html.replace('Topology-typography-1A.jpg', 'Topology-typography-1A.png') # Using png's for Spec, Aug 2022
	html = html.replace('Topology-typography-1B.jpg', 'Topology-typography-1B.png') # Using png's for Spec, Aug 2022
	html = html.replace('Topology-typography-2A.jpg', 'Topology-typography-2A.png') # Using png's for Spec, Aug 2022
	html = html.replace('Topology-typography-2B.jpg', 'Topology-typography-2B.png') # Using png's for Spec, Aug 2022

	html = html.replace('sky is black <br />and the ground is yellow.<br /><br />', 'sky is black <br />and the ground is yellow.<br />') # Aug 2022

	html = html.replace('<div class="contribution 2.5d-romance">', '<div class="contribution romance">') # Aug 2022
	html = html.replace(
		'''</p><p>3D computation has historically co-evolved with Modern technosciences, and aligned with the regimes of optimization, normalization and hegemonic world order. The legacies and projections of industrial development leave traces of that imaginary and tell the stories of a lively tension between “the probable” and “the possible”. Defined as the techniques for measuring volumes, volumetrics all too easily (re)produce and accentuate the probable, and this process is intensified within the technocratic realm of contemporary hyper-computation.
</p><p>This book brings together diverse materials from an ongoing trans<span class="star">✶</span>feminist conversation between artists, software developers and theorists working with techniques and technologies for detecting, tracking, capturing, printing, modeling and rendering volumes.''', 
		'''</p><p>3D computation has historically co-evolved with Modern technosciences, and aligned with the regimes of optimiza-<br>tion, normalization and hegemonic world order. The lega-<br>cies and projections of industrial development leave traces of that imaginary and tell the stories of a lively tension <br>between “the probable” and “the possible”. Defined as the techniques for measuring volumes, volumetrics all too easily (re)produce and accentuate the probable, and this process is intensified within the technocratic realm of <br>contemporary hyper-computation.
</p><p>This book brings together diverse materials from an ongoing trans<span class="star">✶</span>feminist conversation between artists, software developers and theorists working with <br>techniques and technologies for detecting, tracking, capturing, printing, modeling and rendering volumes.''') # Aug 2022

	return html

def clean_up(html):
	"""
		html = string (HTML)
	"""
	html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit]
	html = re.sub(r'href="/index.php\?title=', 'href="#', html) # remove the internal wiki links
	html = re.sub(r'&#91;(?=\d)', '', html) # remove left footnote bracket [
	html = re.sub(r'(?<=\d)&#93;', '', html) # remove right footnote bracket ]
	return html

def fast_loader(html):
	"""
		html = string (HTML)
	"""
	if fast == True:
		html = html.replace('/images/', '/images-small/')
		print('--- rendered in FAST mode ---')

	return html

def parse_page(pagename, wiki):
	"""
		pagename = string
		html = string (HTML)
	"""
	parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'
	data = API_request(parse, pagename)
	# print(json.dumps(data, indent=4))
	if 'parse' in data:
		html = data['parse']['text']['*']
		images = data['parse']['images']
		html = download_media(html, images, wiki) 
		html = clean_up(html)
		html = add_item_inventory_links(html)
		html = tweaking(html)
		html = fast_loader(html)
	else: 
		html = None

	return html

def save(html, pagename):
	"""
		html = string (HTML)
		pagename = string
	"""
	if __name__ == "__main__":
		# command-line

		# save final page that will be used with PagedJS
		template_file = open(f'{ STATIC_FOLDER_PATH }/{ TEMPLATES_DIR }/template.html').read()
		template = jinja2.Template(template_file)
		doc = template.render(publication_unfolded=html, title=pagename)
		
		html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html'
		print('Saving HTML:', html_file)
		with open(html_file, 'w') as out:
			out.write(doc)
			out.close()

		# save extra html page for debugging (CLI only)
		template_file = open(f'{ STATIC_FOLDER_PATH }/{ TEMPLATES_DIR }/template.inspect.html').read()
		template = jinja2.Template(template_file)
		doc = template.render(publication_unfolded=html, title=pagename)

		html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html'
		print('Saving HTML:', html_file)
		with open(html_file, 'w') as out:
			out.write(doc)
			out.close()

	else:
		# Flask application 

		with open(f'{ STATIC_FOLDER_PATH }/Unfolded.html', 'w') as out:
			out.write(html) # save the html to a file (without <head>)

def update_material_now(pagename, wiki):
	"""
		pagename = string
		publication_unfolded = string (HTML)
	"""
	publication_unfolded = parse_page(pagename, wiki)

	return publication_unfolded

# ---

if __name__ == "__main__":

	wiki = 'https://volumetricregimes.xyz' # remove tail slash '/'
	pagename = 'Unfolded'
	
	publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page
	save(publication_unfolded, pagename) # save the page to file