|
|
@ -8,6 +8,12 @@ import jinja2 |
|
|
|
STATIC_FOLDER_PATH = './static' # without trailing slash |
|
|
|
WRAPPING_TEMPLATES_DIR = './wrapping-templates' |
|
|
|
|
|
|
|
# This uses a low quality copy of all the images |
|
|
|
# (using a folder with the name "images-small", |
|
|
|
# which stores a copy of all the images generated with: |
|
|
|
# $ mogrify -quality 5% -adaptive-resize 25% -remap pattern:gray50 * ) |
|
|
|
fast = False |
|
|
|
|
|
|
|
def API_request(url, pagename): |
|
|
|
""" |
|
|
|
url = API request url (string) |
|
|
@ -136,22 +142,37 @@ def add_item_inventory_links(html): |
|
|
|
""" |
|
|
|
html = string (HTML) |
|
|
|
""" |
|
|
|
# Find all references in the text to the item index |
|
|
|
pattern = r'Item \d\d\d' |
|
|
|
matches = re.findall(pattern, html) |
|
|
|
index = {} |
|
|
|
new_html = '' |
|
|
|
from nltk.tokenize import sent_tokenize |
|
|
|
for line in sent_tokenize(html): |
|
|
|
for match in matches: |
|
|
|
if match in line: |
|
|
|
number = match.replace('Item ', '').strip() |
|
|
|
if not number in index: |
|
|
|
index[number] = [] |
|
|
|
count = 1 |
|
|
|
else: |
|
|
|
count = index[number][-1] + 1 |
|
|
|
index[number].append(count) |
|
|
|
item_id = f'ii-{ number }-{ index[number][-1] }' |
|
|
|
line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>') |
|
|
|
|
|
|
|
# the line is pushed back to the new_html |
|
|
|
new_html += line + ' ' |
|
|
|
|
|
|
|
# Also add a <span> around the index nr to style it |
|
|
|
matches = re.findall(r'<li>\d\d\d', new_html) |
|
|
|
for match in matches: |
|
|
|
number = match.replace('Item ', '').strip() |
|
|
|
if not number in index: |
|
|
|
index[number] = [] |
|
|
|
count = 1 |
|
|
|
else: |
|
|
|
count = index[number][-1] + 1 |
|
|
|
index[number].append(count) |
|
|
|
item_id = f'{ number }-{ index[number][-1] }' |
|
|
|
html = html.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>') |
|
|
|
import json |
|
|
|
print(json.dumps(index, indent=4)) |
|
|
|
return html |
|
|
|
new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>') |
|
|
|
|
|
|
|
# import json |
|
|
|
# print(json.dumps(index, indent=4)) |
|
|
|
|
|
|
|
return new_html |
|
|
|
|
|
|
|
def clean_up(html): |
|
|
|
""" |
|
|
@ -163,6 +184,16 @@ def clean_up(html): |
|
|
|
html = re.sub(r'(?<=\d)]', '', html) # remove right footnote bracket ] |
|
|
|
return html |
|
|
|
|
|
|
|
def fast_loader(html): |
|
|
|
""" |
|
|
|
html = string (HTML) |
|
|
|
""" |
|
|
|
if fast == True: |
|
|
|
html = html.replace('/images/', '/images-small/') |
|
|
|
print('--- rendered in FAST mode ---') |
|
|
|
|
|
|
|
return html |
|
|
|
|
|
|
|
def parse_page(pagename, wiki): |
|
|
|
""" |
|
|
|
pagename = string |
|
|
@ -178,6 +209,7 @@ def parse_page(pagename, wiki): |
|
|
|
html = clean_up(html) |
|
|
|
html = add_item_inventory_links(html) |
|
|
|
# html = insert_variable_geometry(html) |
|
|
|
html = fast_loader(html) |
|
|
|
else: |
|
|
|
html = None |
|
|
|
|
|
|
|