From 2248dce8aa601f3c8e08b42f53ff5639028c7b6e Mon Sep 17 00:00:00 2001 From: manetta Date: Wed, 1 Sep 2021 13:09:09 +0200 Subject: [PATCH] syncing the update script between the web-interface and command line version --- command-line/template.html | 15 ------ command-line/templates/template.html | 15 ++++++ .../template.inspect.html} | 2 +- command-line/update.py | 51 ++++++++++--------- 4 files changed, 44 insertions(+), 39 deletions(-) delete mode 100644 command-line/template.html create mode 100644 command-line/templates/template.html rename command-line/{template.debug.html => templates/template.inspect.html} (61%) diff --git a/command-line/template.html b/command-line/template.html deleted file mode 100644 index bee3d50..0000000 --- a/command-line/template.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - -
- {{ publication_unfolded }} -
- - \ No newline at end of file diff --git a/command-line/templates/template.html b/command-line/templates/template.html new file mode 100644 index 0000000..ca73739 --- /dev/null +++ b/command-line/templates/template.html @@ -0,0 +1,15 @@ + + + + + + + + + + +
+ {{ publication_unfolded }} +
+ + \ No newline at end of file diff --git a/command-line/template.debug.html b/command-line/templates/template.inspect.html similarity index 61% rename from command-line/template.debug.html rename to command-line/templates/template.inspect.html index 59ce4bc..ddb1730 100644 --- a/command-line/template.debug.html +++ b/command-line/templates/template.inspect.html @@ -2,7 +2,7 @@ - +
diff --git a/command-line/update.py b/command-line/update.py index bf6b08f..724d973 100644 --- a/command-line/update.py +++ b/command-line/update.py @@ -4,7 +4,10 @@ import re import json import jinja2 -def API_request(url): +STATIC_FOLDER_PATH = '.' # without trailing slash +WRAPPING_TEMPLATES_DIR = './templates' + +def API_request(url, pagename): """ url = API request url (string) data = { 'query': @@ -22,7 +25,7 @@ def API_request(url): data = json.loads(response) # Save response as JSON to be able to inspect API call - json_file = f'{ pagename }.json' + json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json' print('Saving JSON:', json_file) with open(json_file, 'w') as out: out.write(json.dumps(data, indent=4)) @@ -30,14 +33,14 @@ def API_request(url): return data -def download_media(html, images): +def download_media(html, images, wiki): """ html = string (HTML) images = list of filenames (str) """ # check if 'images/' already exists - if not os.path.exists('images'): - os.makedirs('images') + if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'): + os.makedirs(f'{ STATIC_FOLDER_PATH }/images') # download media files for filename in images: @@ -45,7 +48,7 @@ def download_media(html, images): # check if the image is already downloaded # if not, then download the file - if not os.path.isfile(f'images/{ filename }'): + if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'): # first we search for the full filename of the image url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json' @@ -63,7 +66,7 @@ def download_media(html, images): image_response = urllib.request.urlopen(image_url).read() # and we save it as a file - image_path = f'images/{ image_filename }' + image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }' out = open(image_path, 'wb') out.write(image_response) out.close() @@ -72,7 +75,7 @@ def download_media(html, images): time.sleep(3) # do not overload the server # replace src link - image_path = f'images/{ filename }' + image_path = f'/{ STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file html = re.sub(rf'src="/book/images/.*{ filename }"', f'src="{ image_path }"', html) return html @@ -82,28 +85,28 @@ def clean_up(html): html = string (HTML) """ html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit] - html = re.sub(r'href="/book/index.php?title=.*?"', 'href="#"', html) # remove the internal links + html = re.sub(r'href="/book/index.php\?title=', 'href="#', html) # remove the internal wiki links return html -def parse_page(pagename): +def parse_page(pagename, wiki): """ pagename = string html = string (HTML) """ parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json' - data = API_request(parse) + data = API_request(parse, pagename) # print(json.dumps(data, indent=4)) if 'parse' in data: html = data['parse']['text']['*'] images = data['parse']['images'] - html = download_media(html, images) + html = download_media(html, images, wiki) html = clean_up(html) else: html = None return html -def save(html, pagename): +def save(html, pagename, publication_unfolded): """ html = string (HTML) pagename = string @@ -111,41 +114,43 @@ def save(html, pagename): if html: # save final page that will be used with PagedJS - template_file = open('template.html').read() + template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.html').read() template = jinja2.Template(template_file) html = template.render(publication_unfolded=publication_unfolded, title=pagename) - html_file = f'{ pagename }.html' + html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html' print('Saving HTML:', html_file) with open(html_file, 'w') as out: out.write(html) out.close() # save extra html page for debugging - template_file = open('template.debug.html').read() + template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.inspect.html').read() template = jinja2.Template(template_file) html = template.render(publication_unfolded=publication_unfolded, title=pagename) - html_file = f'{ pagename }.debug.html' + html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html' print('Saving HTML:', html_file) with open(html_file, 'w') as out: out.write(html) out.close() -def update_material_now(pagename): +def update_material_now(pagename, wiki): """ pagename = string publication_unfolded = string (HTML) """ - publication_unfolded = parse_page(pagename) + publication_unfolded = parse_page(pagename, wiki) return publication_unfolded # --- -wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/' -pagename = 'Unfolded' +if __name__ == "__main__": -publication_unfolded = update_material_now(pagename) # download the latest version of the page -save(publication_unfolded, pagename) # save the page to file + wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/' + pagename = 'Unfolded' + + publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page + save(publication_unfolded, pagename, publication_unfolded) # save the page to file