volumetric-regimes-book/command-line/update.py


								import urllib.request

								import os

								import re

								import json

								import jinja2


								def API_request(url):

									"""

										url = API request url (string)

										data =  { 'query':

													'pages' :

														pageid : {

															'links' : {

																'?' : '?'

																'title' : 'pagename'

															}

														}

													}

												}

									"""

									response = urllib.request.urlopen(url).read()

									data = json.loads(response)


									# Save response as JSON to be able to inspect API call

									json_file = f'{ pagename }.json'

									print('Saving JSON:', json_file)

									with open(json_file, 'w') as out:

										out.write(json.dumps(data, indent=4))

										out.close()


									return data


								def download_media(html, images):

									"""

										html = string (HTML)

										images = list of filenames (str)

									"""

									# check if 'images/' already exists

									if not os.path.exists('images'):

										os.makedirs('images')


									# download media files

									for filename in images:

										filename = filename.replace(' ', '_') # safe filenames


										# check if the image is already downloaded

										# if not, then download the file

										if not os.path.isfile(f'images/{ filename }'):


											# first we search for the full filename of the image

											url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'

											response = urllib.request.urlopen(url).read()

											data = json.loads(response)


											# we select the first search result

											# (assuming that this is the image we are looking for)

											image = data['query']['allimages'][0]


											# then we download the image

											image_url = image['url']

											image_filename = image['name']

											print('Downloading:', image_filename)

											image_response = urllib.request.urlopen(image_url).read()


											# and we save it as a file

											image_path = f'images/{ image_filename }'

											out = open(image_path, 'wb')

											out.write(image_response)

											out.close()


											import time

											time.sleep(3) # do not overload the server


										# replace src link

										image_path = f'images/{ filename }'

										html = re.sub(rf'src="/book/images/.*{ filename }"', f'src="{ image_path }"', html)


									return html


								def clean_up(html):

									"""

										html = string (HTML)

									"""

									html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit]

									html = re.sub(r'href="/book/index.php?title=.*?"', 'href="#"', html) # remove the internal links

									return html


								def parse_page(pagename):

									"""

										pagename = string

										html = string (HTML)

									"""

									parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'

									data = API_request(parse)

									# print(json.dumps(data, indent=4))

									if 'parse' in data:

										html = data['parse']['text']['*']

										images = data['parse']['images']

										html = download_media(html, images)

										html = clean_up(html)

									else:

										html = None


									return html


								def save(html, pagename):

									"""

										html = string (HTML)

										pagename = string

									"""

									if html:


										# save final page that will be used with PagedJS

										template_file = open('template.html').read()

										template = jinja2.Template(template_file)

										html = template.render(publication_unfolded=publication_unfolded, title=pagename)


										html_file = f'{ pagename }.html'

										print('Saving HTML:', html_file)

										with open(html_file, 'w') as out:

											out.write(html)

											out.close()


										# save extra html page for debugging

										template_file = open('template.debug.html').read()

										template = jinja2.Template(template_file)

										html = template.render(publication_unfolded=publication_unfolded, title=pagename)


										html_file = f'{ pagename }.debug.html'

										print('Saving HTML:', html_file)

										with open(html_file, 'w') as out:

											out.write(html)

											out.close()


								def update_material_now(pagename):

									"""

										pagename = string

										publication_unfolded = string (HTML)

									"""

									publication_unfolded = parse_page(pagename)


									return publication_unfolded


								# ---


								wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'

								pagename = 'Unfolded'


								publication_unfolded = update_material_now(pagename) # download the latest version of the page

								save(publication_unfolded, pagename) # save the page to file