syncing the update script between the web-interface and command line version
This commit is contained in:
parent
db1d5573a9
commit
2248dce8aa
@ -1,15 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<script src="js/paged.js" type="text/javascript"></script>
|
||||
<script src="js/paged.polyfill.js" type="text/javascript"></script>
|
||||
<link href="css/interface.css" rel="stylesheet" type="text/css">
|
||||
<link href="css/print.css" rel="stylesheet" type="text/css" media="print">
|
||||
</head>
|
||||
<body>
|
||||
<div id="wrapper">
|
||||
{{ publication_unfolded }}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
15
command-line/templates/template.html
Normal file
15
command-line/templates/template.html
Normal file
@ -0,0 +1,15 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<script src="./js/paged.js" type="text/javascript"></script>
|
||||
<script src="./js/paged.polyfill.js" type="text/javascript"></script>
|
||||
<link href="./css/interface.css" rel="stylesheet" type="text/css">
|
||||
<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
|
||||
</head>
|
||||
<body>
|
||||
<div id="wrapper">
|
||||
{{ publication_unfolded }}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@ -2,7 +2,7 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<link href="css/print.css" rel="stylesheet" type="text/css" media="print">
|
||||
<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
|
||||
</head>
|
||||
<body>
|
||||
<div id="wrapper">
|
@ -4,7 +4,10 @@ import re
|
||||
import json
|
||||
import jinja2
|
||||
|
||||
def API_request(url):
|
||||
STATIC_FOLDER_PATH = '.' # without trailing slash
|
||||
WRAPPING_TEMPLATES_DIR = './templates'
|
||||
|
||||
def API_request(url, pagename):
|
||||
"""
|
||||
url = API request url (string)
|
||||
data = { 'query':
|
||||
@ -22,7 +25,7 @@ def API_request(url):
|
||||
data = json.loads(response)
|
||||
|
||||
# Save response as JSON to be able to inspect API call
|
||||
json_file = f'{ pagename }.json'
|
||||
json_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.json'
|
||||
print('Saving JSON:', json_file)
|
||||
with open(json_file, 'w') as out:
|
||||
out.write(json.dumps(data, indent=4))
|
||||
@ -30,14 +33,14 @@ def API_request(url):
|
||||
|
||||
return data
|
||||
|
||||
def download_media(html, images):
|
||||
def download_media(html, images, wiki):
|
||||
"""
|
||||
html = string (HTML)
|
||||
images = list of filenames (str)
|
||||
"""
|
||||
# check if 'images/' already exists
|
||||
if not os.path.exists('images'):
|
||||
os.makedirs('images')
|
||||
if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
|
||||
os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
|
||||
|
||||
# download media files
|
||||
for filename in images:
|
||||
@ -45,7 +48,7 @@ def download_media(html, images):
|
||||
|
||||
# check if the image is already downloaded
|
||||
# if not, then download the file
|
||||
if not os.path.isfile(f'images/{ filename }'):
|
||||
if not os.path.isfile(f'{ STATIC_FOLDER_PATH }/images/{ filename }'):
|
||||
|
||||
# first we search for the full filename of the image
|
||||
url = f'{ wiki }/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
|
||||
@ -63,7 +66,7 @@ def download_media(html, images):
|
||||
image_response = urllib.request.urlopen(image_url).read()
|
||||
|
||||
# and we save it as a file
|
||||
image_path = f'images/{ image_filename }'
|
||||
image_path = f'{ STATIC_FOLDER_PATH }/images/{ image_filename }'
|
||||
out = open(image_path, 'wb')
|
||||
out.write(image_response)
|
||||
out.close()
|
||||
@ -72,7 +75,7 @@ def download_media(html, images):
|
||||
time.sleep(3) # do not overload the server
|
||||
|
||||
# replace src link
|
||||
image_path = f'images/{ filename }'
|
||||
image_path = f'/{ STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
|
||||
html = re.sub(rf'src="/book/images/.*{ filename }"', f'src="{ image_path }"', html)
|
||||
|
||||
return html
|
||||
@ -82,28 +85,28 @@ def clean_up(html):
|
||||
html = string (HTML)
|
||||
"""
|
||||
html = re.sub(r'\[.*edit.*\]', '', html) # remove the [edit]
|
||||
html = re.sub(r'href="/book/index.php?title=.*?"', 'href="#"', html) # remove the internal links
|
||||
html = re.sub(r'href="/book/index.php\?title=', 'href="#', html) # remove the internal wiki links
|
||||
return html
|
||||
|
||||
def parse_page(pagename):
|
||||
def parse_page(pagename, wiki):
|
||||
"""
|
||||
pagename = string
|
||||
html = string (HTML)
|
||||
"""
|
||||
parse = f'{ wiki }/api.php?action=parse&page={ pagename }&pst=True&format=json'
|
||||
data = API_request(parse)
|
||||
data = API_request(parse, pagename)
|
||||
# print(json.dumps(data, indent=4))
|
||||
if 'parse' in data:
|
||||
html = data['parse']['text']['*']
|
||||
images = data['parse']['images']
|
||||
html = download_media(html, images)
|
||||
html = download_media(html, images, wiki)
|
||||
html = clean_up(html)
|
||||
else:
|
||||
html = None
|
||||
|
||||
return html
|
||||
|
||||
def save(html, pagename):
|
||||
def save(html, pagename, publication_unfolded):
|
||||
"""
|
||||
html = string (HTML)
|
||||
pagename = string
|
||||
@ -111,41 +114,43 @@ def save(html, pagename):
|
||||
if html:
|
||||
|
||||
# save final page that will be used with PagedJS
|
||||
template_file = open('template.html').read()
|
||||
template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.html').read()
|
||||
template = jinja2.Template(template_file)
|
||||
html = template.render(publication_unfolded=publication_unfolded, title=pagename)
|
||||
|
||||
html_file = f'{ pagename }.html'
|
||||
html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.html'
|
||||
print('Saving HTML:', html_file)
|
||||
with open(html_file, 'w') as out:
|
||||
out.write(html)
|
||||
out.close()
|
||||
|
||||
# save extra html page for debugging
|
||||
template_file = open('template.debug.html').read()
|
||||
template_file = open(f'{ STATIC_FOLDER_PATH }/{ WRAPPING_TEMPLATES_DIR }/template.inspect.html').read()
|
||||
template = jinja2.Template(template_file)
|
||||
html = template.render(publication_unfolded=publication_unfolded, title=pagename)
|
||||
|
||||
html_file = f'{ pagename }.debug.html'
|
||||
html_file = f'{ STATIC_FOLDER_PATH }/{ pagename }.inspect.html'
|
||||
print('Saving HTML:', html_file)
|
||||
with open(html_file, 'w') as out:
|
||||
out.write(html)
|
||||
out.close()
|
||||
|
||||
def update_material_now(pagename):
|
||||
def update_material_now(pagename, wiki):
|
||||
"""
|
||||
pagename = string
|
||||
publication_unfolded = string (HTML)
|
||||
"""
|
||||
publication_unfolded = parse_page(pagename)
|
||||
publication_unfolded = parse_page(pagename, wiki)
|
||||
|
||||
return publication_unfolded
|
||||
|
||||
# ---
|
||||
|
||||
wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'
|
||||
pagename = 'Unfolded'
|
||||
if __name__ == "__main__":
|
||||
|
||||
publication_unfolded = update_material_now(pagename) # download the latest version of the page
|
||||
save(publication_unfolded, pagename) # save the page to file
|
||||
wiki = 'https://possiblebodies.constantvzw.org/book' # remove tail slash '/'
|
||||
pagename = 'Unfolded'
|
||||
|
||||
publication_unfolded = update_material_now(pagename, wiki) # download the latest version of the page
|
||||
save(publication_unfolded, pagename, publication_unfolded) # save the page to file
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user