From 2248dce8aa601f3c8e08b42f53ff5639028c7b6e Mon Sep 17 00:00:00 2001
From: manetta <mail@manettaberends.nl>
Date: Wed, 1 Sep 2021 13:09:09 +0200
Subject: [PATCH] syncing the update script between the web-interface and
 command line version

---
 command-line/template.html                    | 15 ------
 command-line/templates/template.html          | 15 ++++++
 .../template.inspect.html}                    |  2 +-
 command-line/update.py                        | 51 ++++++++++---------
 4 files changed, 44 insertions(+), 39 deletions(-)
 delete mode 100644 command-line/template.html
 create mode 100644 command-line/templates/template.html
 rename command-line/{template.debug.html => templates/template.inspect.html} (61%)
diff --git a/command-line/template.html b/command-line/template.html
deleted file mode 100644
index bee3d50..0000000
--- a/command-line/template.html
+++ /dev/null
@@ -1,15 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-	<meta charset="utf-8">
-	<script src="js/paged.js" type="text/javascript"></script>
-	<script src="js/paged.polyfill.js" type="text/javascript"></script>
-	<link href="css/interface.css" rel="stylesheet" type="text/css">
-	<link href="css/print.css" rel="stylesheet" type="text/css" media="print">
-</head>
-<body>
-	<div id="wrapper">
-		{{ publication_unfolded }}
-	</div>
-</body>
-</html>
\ No newline at end of file
diff --git a/command-line/templates/template.html b/command-line/templates/template.html
new file mode 100644
index 0000000..ca73739
--- /dev/null
+++ b/command-line/templates/template.html
@@ -0,0 +1,15 @@
+<!DOCTYPE html>
+<html>
+<head>
+	<meta charset="utf-8">
+	<script src="./js/paged.js" type="text/javascript"></script>
+	<script src="./js/paged.polyfill.js" type="text/javascript"></script>
+	<link href="./css/interface.css" rel="stylesheet" type="text/css">
+	<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
+</head>
+<body>
+	<div id="wrapper">
+		{{ publication_unfolded }}
+	</div>
+</body>
+</html>
\ No newline at end of file
diff --git a/command-line/template.debug.html b/command-line/templates/template.inspect.html
similarity index 61%
rename from command-line/template.debug.html
rename to command-line/templates/template.inspect.html
index 59ce4bc..ddb1730 100644
--- a/command-line/template.debug.html
+++ b/command-line/templates/template.inspect.html
@@ -2,7 +2,7 @@
 <html>
 <head>
 	<meta charset="utf-8">
-	<link href="css/print.css" rel="stylesheet" type="text/css" media="print">
+	<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
 </head>
 <body>
 	<div id="wrapper">
diff --git a/command-line/update.py b/command-line/update.py
index bf6b08f..724d973 100644
--- a/command-line/update.py
+++ b/command-line/update.py
@@ -4,7 +4,10 @@ import re
 import json
 import jinja2
 
-def API_request(url):
+STATIC_FOLDER_PATH = '.' # without trailing slash
+WRAPPING_TEMPLATES_DIR = './templates'
+
+def API_request(url, pagename):
 	"""
 		url = API request url (string)
 		data =  { 'query': 
@@ -22,7 +25,7 @@ def API_request(url):
 	data = json.loads(response)
 
 	# Save response as JSON to be able to inspect API call
-	json_file = f'{ pagename }.json'
+	json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json'
 	print('Saving JSON:', json_file)
 	with open(json_file, 'w') as out:
 		out.write(json.dumps(data, indent=4))
@@ -30,14 +33,14 @@ def API_request(url):
 
 	return data
 
-def download_media(html, images):
+def download_media(html, images, wiki):
 	"""
 		html = string (HTML)
 		images = list of filenames (str)
 	"""
 	# check if 'images/' already exists
-	if not os.path.exists('images'):
-		os.makedirs('images')
+	if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
+		os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
 
 	# download media files
 	for filename in images:
@@ -45,7 +48,7 @@ def download_media(html, images):
 
 		# check if the image is already downloaded
 		# if not, then download the file
-		if not os.path.isfile(f'images/{ filename }'):
+		if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'):
 
 			# first we search for the full filename of the image
 			url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
@@ -63,7 +66,7 @@ def download_media(html, images):
 			image_response = urllib.request.urlopen(image_url).read()
 
 			# and we save it as a file
-			image_path = f'images/{ image_filename }'
+			image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }'
 			out = open(image_path, 'wb') 
 			out.write(image_response)
 			out.close()
@@ -72,7 +75,7 @@ def download_media(html, images):
 			time.sleep(3) # do not overload the server
 
 		# replace src link
-		image_path = f'images/{ filename }'
+		image_path = f'/{ STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
 		html = re.sub(rf'src="/book/images/.*{ filename }"', f'src="{ image_path }"', html)
 
 	return html
@@ -82,28 +85,28 @@ def clean_up(html):
 		html = string (HTML)
 	"""
 	html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit]
-	html = re.sub(r'href="/book/index.php?title=.*?"', 'href="#"', html) # remove the internal links
+	html = re.sub(r'href="/book/index.php\?title=', 'href="#', html) # remove the internal wiki links
 	return html
 
-def parse_page(pagename):
+def parse_page(pagename, wiki):
 	"""
 		pagename = string
 		html = string (HTML)
 	"""
 	parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'
-	data = API_request(parse)
+	data = API_request(parse, pagename)
 	# print(json.dumps(data, indent=4))
 	if 'parse' in data:
 		html = data['parse']['text']['*']
 		images = data['parse']['images']
-		html = download_media(html, images)
+		html = download_media(html, images, wiki)
 		html = clean_up(html)
 	else: 
 		html = None
 
 	return html
 
-def save(html, pagename):
+def save(html, pagename, publication_unfolded):
 	"""
 		html = string (HTML)
 		pagename = string
@@ -111,41 +114,43 @@ def save(html, pagename):
 	if html:
 
 		# save final page that will be used with PagedJS
-		template_file = open('template.html').read()
+		template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.html').read()
 		template = jinja2.Template(template_file)
 		html = template.render(publication_unfolded=publication_unfolded, title=pagename)
 		
-		html_file = f'{ pagename }.html'
+		html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html'
 		print('Saving HTML:', html_file)
 		with open(html_file, 'w') as out:
 			out.write(html)
 			out.close()
 
 		# save extra html page for debugging
-		template_file = open('template.debug.html').read()
+		template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.inspect.html').read()
 		template = jinja2.Template(template_file)
 		html = template.render(publication_unfolded=publication_unfolded, title=pagename)
 
-		html_file = f'{ pagename }.debug.html'
+		html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html'
 		print('Saving HTML:', html_file)
 		with open(html_file, 'w') as out:
 			out.write(html)
 			out.close()
 
-def update_material_now(pagename):
+def update_material_now(pagename, wiki):
 	"""
 		pagename = string
 		publication_unfolded = string (HTML)
 	"""
-	publication_unfolded = parse_page(pagename)
+	publication_unfolded = parse_page(pagename, wiki)
 
 	return publication_unfolded
 
 # ---
 
-wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'
-pagename = 'Unfolded'
+if __name__ == "__main__":
 
-publication_unfolded = update_material_now(pagename) # download the latest version of the page
-save(publication_unfolded, pagename) # save the page to file
+	wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'
+	pagename = 'Unfolded'
+	
+	publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page
+	save(publication_unfolded, pagename, publication_unfolded) # save the page to file