2021-06-16 16:46:25 +02:00
import urllib . request
import os
import re
import json
import jinja2
2021-09-01 13:09:09 +02:00
STATIC_FOLDER_PATH = ' . ' # without trailing slash
WRAPPING_TEMPLATES_DIR = ' ./templates '
2021-09-22 17:36:30 +02:00
# This uses a low quality copy of all the images
# (using a folder with the name "images-small",
# which stores a copy of all the images generated with:
# $ mogrify -quality 5% -adaptive-resize 25% -remap pattern:gray50 * )
fast = False
2021-09-01 13:09:09 +02:00
def API_request ( url , pagename ) :
2021-06-16 16:46:25 +02:00
"""
url = API request url ( string )
data = { ' query ' :
' pages ' :
pageid : {
' links ' : {
' ? ' : ' ? '
' title ' : ' pagename '
}
}
}
}
"""
response = urllib . request . urlopen ( url ) . read ( )
data = json . loads ( response )
# Save response as JSON to be able to inspect API call
2021-09-01 13:09:09 +02:00
json_file = f ' { STATIC_FOLDER_PATH } / { pagename } .json '
2021-06-16 16:46:25 +02:00
print ( ' Saving JSON: ' , json_file )
with open ( json_file , ' w ' ) as out :
out . write ( json . dumps ( data , indent = 4 ) )
out . close ( )
return data
2021-09-01 13:09:09 +02:00
def download_media ( html , images , wiki ) :
2021-06-16 16:46:25 +02:00
"""
html = string ( HTML )
images = list of filenames ( str )
"""
# check if 'images/' already exists
2021-09-01 13:09:09 +02:00
if not os . path . exists ( f ' { STATIC_FOLDER_PATH } /images ' ) :
os . makedirs ( f ' { STATIC_FOLDER_PATH } /images ' )
2021-06-16 16:46:25 +02:00
# download media files
for filename in images :
filename = filename . replace ( ' ' , ' _ ' ) # safe filenames
# check if the image is already downloaded
# if not, then download the file
2021-09-01 13:09:09 +02:00
if not os . path . isfile ( f ' { STATIC_FOLDER_PATH } /images/ { filename } ' ) :
2021-06-16 16:46:25 +02:00
# first we search for the full filename of the image
url = f ' { wiki } /api.php?action=query&list=allimages&aifrom= { filename } &format=json '
response = urllib . request . urlopen ( url ) . read ( )
data = json . loads ( response )
# we select the first search result
# (assuming that this is the image we are looking for)
2021-09-29 18:54:41 +02:00
image = data [ ' query ' ] [ ' allimages ' ] [ 0 ]
2021-06-16 16:46:25 +02:00
# then we download the image
2021-09-29 18:54:41 +02:00
print ( image )
2021-06-16 16:46:25 +02:00
image_url = image [ ' url ' ]
image_filename = image [ ' name ' ]
print ( ' Downloading: ' , image_filename )
image_response = urllib . request . urlopen ( image_url ) . read ( )
# and we save it as a file
2021-09-01 13:09:09 +02:00
image_path = f ' { STATIC_FOLDER_PATH } /images/ { image_filename } '
2021-06-16 16:46:25 +02:00
out = open ( image_path , ' wb ' )
out . write ( image_response )
out . close ( )
import time
time . sleep ( 3 ) # do not overload the server
# replace src link
2021-09-01 13:09:09 +02:00
image_path = f ' / { STATIC_FOLDER_PATH } /images/ { filename } ' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
2021-06-16 16:46:25 +02:00
html = re . sub ( rf ' src= " /book/images/.* { filename } " ' , f ' src= " { image_path } " ' , html )
return html
2021-09-16 18:37:23 +02:00
def insert_variable_geometry ( html ) :
vg = """
< script >
/ * Sketch . js from the Chapter Variable Geometry in Aesthetic Programming - A Handbook of Software Studies , by Winnie Soon & Geoff Cox ( 2020 ) - http : / / aesthetic - programming . net / * /
/ * Inspired by David Reinfurt ' s work - Multi*/
let moving_size = 50 ;
let static_size = 20 ;
function setup ( ) {
createCanvas ( windowWidth , windowHeight ) ;
frameRate ( 15 ) ;
}
function draw ( ) {
/ / background
background ( 230 ) ;
/ / left
noStroke ( )
fill ( 0 ) ;
rect ( 97 , 169 , 79 , 12 ) ;
/ / right
rect ( 365 , 184 , 20 , 15 ) ;
fill ( 20 , 20 , 120 ) ;
beginShape ( ) ;
vertex ( 365 , 199 ) ;
vertex ( 385 , 199 ) ;
vertex ( 372 , 216 ) ;
vertex ( 358 , 216 ) ;
endShape ( CLOSE ) ;
/ / bottom
noFill ( ) ;
stroke ( 130 ) ;
strokeWeight ( 2 ) ;
ellipse ( 255 , 350 , static_size , static_size ) ;
/ / mouse interactions
stroke ( 180 ) ;
ellipse ( mouseX , mouseY , moving_size , moving_size ) ;
if ( mouseIsPressed ) {
static_size = floor ( random ( 5 , 20 ) ) ;
}
}
< / script > """
html = html . replace ( " $multi " , vg )
return html
def add_item_inventory_links ( html ) :
2021-09-08 17:42:03 +02:00
"""
html = string ( HTML )
"""
2021-09-22 17:36:30 +02:00
# Find all references in the text to the item index
2021-09-16 18:37:23 +02:00
pattern = r ' Item \ d \ d \ d '
matches = re . findall ( pattern , html )
index = { }
2021-09-22 17:36:30 +02:00
new_html = ' '
from nltk . tokenize import sent_tokenize
for line in sent_tokenize ( html ) :
for match in matches :
if match in line :
number = match . replace ( ' Item ' , ' ' ) . strip ( )
if not number in index :
index [ number ] = [ ]
count = 1
else :
count = index [ number ] [ - 1 ] + 1
index [ number ] . append ( count )
item_id = f ' ii- { number } - { index [ number ] [ - 1 ] } '
line = line . replace ( match , f ' Item <a id= " { item_id } " href= " #Item_Index " > { number } </a> ' )
# the line is pushed back to the new_html
new_html + = line + ' '
# Also add a <span> around the index nr to style it
matches = re . findall ( r ' <li> \ d \ d \ d ' , new_html )
2021-09-16 18:37:23 +02:00
for match in matches :
2021-09-22 17:36:30 +02:00
new_html = new_html . replace ( match , f ' <li><span class= " item_nr " > { match } </span> ' )
# import json
# print(json.dumps(index, indent=4))
return new_html
2021-09-08 17:42:03 +02:00
2021-06-16 16:46:25 +02:00
def clean_up ( html ) :
"""
html = string ( HTML )
"""
html = re . sub ( r ' \ [.*edit.* \ ] ' , ' ' , html ) # remove the [edit]
2021-09-01 13:09:09 +02:00
html = re . sub ( r ' href= " /book/index.php \ ?title= ' , ' href= " # ' , html ) # remove the internal wiki links
2021-09-08 15:34:52 +02:00
html = re . sub ( r ' [(?= \ d) ' , ' ' , html ) # remove left footnote bracket [
html = re . sub ( r ' (?<= \ d)] ' , ' ' , html ) # remove right footnote bracket ]
2021-06-16 16:46:25 +02:00
return html
2021-09-22 17:36:30 +02:00
def fast_loader ( html ) :
"""
html = string ( HTML )
"""
if fast == True :
html = html . replace ( ' /images/ ' , ' /images-small/ ' )
print ( ' --- rendered in FAST mode --- ' )
return html
2021-09-01 13:09:09 +02:00
def parse_page ( pagename , wiki ) :
2021-06-16 16:46:25 +02:00
"""
pagename = string
html = string ( HTML )
"""
parse = f ' { wiki } /api.php?action=parse&page= { pagename } &pst=True&format=json '
2021-09-01 13:09:09 +02:00
data = API_request ( parse , pagename )
2021-06-16 16:46:25 +02:00
# print(json.dumps(data, indent=4))
if ' parse ' in data :
html = data [ ' parse ' ] [ ' text ' ] [ ' * ' ]
images = data [ ' parse ' ] [ ' images ' ]
2021-09-01 13:09:09 +02:00
html = download_media ( html , images , wiki )
2021-06-16 16:46:25 +02:00
html = clean_up ( html )
2021-09-16 18:37:23 +02:00
html = add_item_inventory_links ( html )
# html = insert_variable_geometry(html)
2021-09-22 17:36:30 +02:00
html = fast_loader ( html )
2021-06-16 16:46:25 +02:00
else :
html = None
return html
2021-09-01 13:09:09 +02:00
def save ( html , pagename , publication_unfolded ) :
2021-06-16 16:46:25 +02:00
"""
html = string ( HTML )
pagename = string
"""
if html :
# save final page that will be used with PagedJS
2021-09-01 13:09:09 +02:00
template_file = open ( f ' { STATIC_FOLDER_PATH } / { WRAPPING_TEMPLATES_DIR } /template.html ' ) . read ( )
2021-06-16 16:46:25 +02:00
template = jinja2 . Template ( template_file )
html = template . render ( publication_unfolded = publication_unfolded , title = pagename )
2021-09-01 13:09:09 +02:00
html_file = f ' { STATIC_FOLDER_PATH } / { pagename } .html '
2021-06-16 16:46:25 +02:00
print ( ' Saving HTML: ' , html_file )
with open ( html_file , ' w ' ) as out :
out . write ( html )
out . close ( )
# save extra html page for debugging
2021-09-01 13:09:09 +02:00
template_file = open ( f ' { STATIC_FOLDER_PATH } / { WRAPPING_TEMPLATES_DIR } /template.inspect.html ' ) . read ( )
2021-06-16 16:46:25 +02:00
template = jinja2 . Template ( template_file )
html = template . render ( publication_unfolded = publication_unfolded , title = pagename )
2021-09-01 13:09:09 +02:00
html_file = f ' { STATIC_FOLDER_PATH } / { pagename } .inspect.html '
2021-06-16 16:46:25 +02:00
print ( ' Saving HTML: ' , html_file )
with open ( html_file , ' w ' ) as out :
out . write ( html )
out . close ( )
2021-09-01 13:09:09 +02:00
def update_material_now ( pagename , wiki ) :
2021-06-16 16:46:25 +02:00
"""
pagename = string
publication_unfolded = string ( HTML )
"""
2021-09-01 13:09:09 +02:00
publication_unfolded = parse_page ( pagename , wiki )
2021-06-16 16:46:25 +02:00
return publication_unfolded
# ---
2021-09-01 13:09:09 +02:00
if __name__ == " __main__ " :
2021-06-16 16:46:25 +02:00
2021-09-01 13:09:09 +02:00
wiki = ' https://possiblebodies.constantvzw.org/book ' # remove tail slash '/'
pagename = ' Unfolded '
publication_unfolded = update_material_now ( pagename , wiki ) # download the latest version of the page
save ( publication_unfolded , pagename , publication_unfolded ) # save the page to file
2021-06-16 16:46:25 +02:00