volumetric-regimes-book/command-line/update.py

266 lines
7.1 KiB
Python
Raw Normal View History

2021-06-16 16:46:25 +02:00
import urllib.request
import os
import re
import json
import jinja2
STATIC_FOLDER_PATH = '.' # without trailing slash
WRAPPING_TEMPLATES_DIR = './templates'
# This uses a low quality copy of all the images
# (using a folder with the name "images-small",
# which stores a copy of all the images generated with:
# $ mogrify -quality 5% -adaptive-resize 25% -remap pattern:gray50 * )
fast = False
def API_request(url, pagename):
2021-06-16 16:46:25 +02:00
"""
url = API request url (string)
data = { 'query':
'pages' :
pageid : {
'links' : {
'?' : '?'
'title' : 'pagename'
}
}
}
}
"""
response = urllib.request.urlopen(url).read()
data = json.loads(response)
# Save response as JSON to be able to inspect API call
json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json'
2021-06-16 16:46:25 +02:00
print('Saving JSON:', json_file)
with open(json_file, 'w') as out:
out.write(json.dumps(data, indent=4))
out.close()
return data
def download_media(html, images, wiki):
2021-06-16 16:46:25 +02:00
"""
html = string (HTML)
images = list of filenames (str)
"""
# check if 'images/' already exists
if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
2021-06-16 16:46:25 +02:00
# download media files
for filename in images:
filename = filename.replace(' ', '_') # safe filenames
# check if the image is already downloaded
# if not, then download the file
if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'):
2021-06-16 16:46:25 +02:00
# first we search for the full filename of the image
url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
response = urllib.request.urlopen(url).read()
data = json.loads(response)
# we select the first search result
# (assuming that this is the image we are looking for)
image = data['query']['allimages'][0]
2021-06-16 16:46:25 +02:00
# then we download the image
print(image)
2021-06-16 16:46:25 +02:00
image_url = image['url']
image_filename = image['name']
print('Downloading:', image_filename)
image_response = urllib.request.urlopen(image_url).read()
# and we save it as a file
image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }'
2021-06-16 16:46:25 +02:00
out = open(image_path, 'wb')
out.write(image_response)
out.close()
import time
time.sleep(3) # do not overload the server
# replace src link
image_path = f'/{ STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
2021-06-16 16:46:25 +02:00
html = re.sub(rf'src="/book/images/.*{ filename }"', f'src="{ image_path }"', html)
return html
2021-09-16 18:37:23 +02:00
def insert_variable_geometry(html):
vg = """
<script>
/*Sketch.js from the Chapter Variable Geometry in Aesthetic Programming - A Handbook of Software Studies, by Winnie Soon & Geoff Cox (2020) - http://aesthetic-programming.net/*/
/*Inspired by David Reinfurt's work - Multi*/
let moving_size = 50;
let static_size = 20;
function setup() {
createCanvas(windowWidth, windowHeight);
frameRate(15);
}
function draw() {
//background
background(230);
//left
noStroke()
fill(0);
rect(97, 169, 79, 12);
//right
rect(365, 184, 20, 15);
fill(20, 20, 120);
beginShape();
vertex(365, 199);
vertex(385, 199);
vertex(372, 216);
vertex(358, 216);
endShape(CLOSE);
//bottom
noFill();
stroke(130);
strokeWeight(2);
ellipse(255, 350, static_size, static_size);
//mouse interactions
stroke(180);
ellipse(mouseX, mouseY, moving_size, moving_size);
if (mouseIsPressed) {
static_size = floor(random(5, 20));
}
}
</script>"""
html = html.replace("$multi", vg)
return html
def add_item_inventory_links(html):
2021-09-08 17:42:03 +02:00
"""
html = string (HTML)
"""
# Find all references in the text to the item index
2021-09-16 18:37:23 +02:00
pattern = r'Item \d\d\d'
matches = re.findall(pattern, html)
index = {}
new_html = ''
from nltk.tokenize import sent_tokenize
for line in sent_tokenize(html):
for match in matches:
if match in line:
number = match.replace('Item ', '').strip()
if not number in index:
index[number] = []
count = 1
else:
count = index[number][-1] + 1
index[number].append(count)
item_id = f'ii-{ number }-{ index[number][-1] }'
line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')
# the line is pushed back to the new_html
new_html += line + ' '
# Also add a <span> around the index nr to style it
matches = re.findall(r'<li>\d\d\d', new_html)
2021-09-16 18:37:23 +02:00
for match in matches:
new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')
# import json
# print(json.dumps(index, indent=4))
return new_html
2021-09-08 17:42:03 +02:00
2021-06-16 16:46:25 +02:00
def clean_up(html):
"""
html = string (HTML)
"""
html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit]
html = re.sub(r'href="/book/index.php\?title=', 'href="#', html) # remove the internal wiki links
html = re.sub(r'&#91;(?=\d)', '', html) # remove left footnote bracket [
html = re.sub(r'(?<=\d)&#93;', '', html) # remove right footnote bracket ]
2021-06-16 16:46:25 +02:00
return html
def fast_loader(html):
"""
html = string (HTML)
"""
if fast == True:
html = html.replace('/images/', '/images-small/')
print('--- rendered in FAST mode ---')
return html
def parse_page(pagename, wiki):
2021-06-16 16:46:25 +02:00
"""
pagename = string
html = string (HTML)
"""
parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'
data = API_request(parse, pagename)
2021-06-16 16:46:25 +02:00
# print(json.dumps(data, indent=4))
if 'parse' in data:
html = data['parse']['text']['*']
images = data['parse']['images']
html = download_media(html, images, wiki)
2021-06-16 16:46:25 +02:00
html = clean_up(html)
2021-09-16 18:37:23 +02:00
html = add_item_inventory_links(html)
# html = insert_variable_geometry(html)
html = fast_loader(html)
2021-06-16 16:46:25 +02:00
else:
html = None
return html
def save(html, pagename, publication_unfolded):
2021-06-16 16:46:25 +02:00
"""
html = string (HTML)
pagename = string
"""
if html:
# save final page that will be used with PagedJS
template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.html').read()
2021-06-16 16:46:25 +02:00
template = jinja2.Template(template_file)
html = template.render(publication_unfolded=publication_unfolded, title=pagename)
html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html'
2021-06-16 16:46:25 +02:00
print('Saving HTML:', html_file)
with open(html_file, 'w') as out:
out.write(html)
out.close()
# save extra html page for debugging
template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.inspect.html').read()
2021-06-16 16:46:25 +02:00
template = jinja2.Template(template_file)
html = template.render(publication_unfolded=publication_unfolded, title=pagename)
html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html'
2021-06-16 16:46:25 +02:00
print('Saving HTML:', html_file)
with open(html_file, 'w') as out:
out.write(html)
out.close()
def update_material_now(pagename, wiki):
2021-06-16 16:46:25 +02:00
"""
pagename = string
publication_unfolded = string (HTML)
"""
publication_unfolded = parse_page(pagename, wiki)
2021-06-16 16:46:25 +02:00
return publication_unfolded
# ---
if __name__ == "__main__":
2021-06-16 16:46:25 +02:00
wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'
pagename = 'Unfolded'
publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page
save(publication_unfolded, pagename, publication_unfolded) # save the page to file
2021-06-16 16:46:25 +02:00