This commit is contained in:
manetta 2021-12-03 00:01:13 +01:00
parent 39b3220444
commit 3f8ccedc8c
3 changed files with 109 additions and 52 deletions

View File

@ -85,8 +85,8 @@ body{
letter-spacing: 0.01em;
/*hyphens: auto;*/
/*-webkit-hyphenate-limit-chars: 8 2 4;*/ /* word length, minimum number of characters before and after the hyphen -- does not work in chrome */
orphans: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
widows: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
orphans: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
widows: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
}
@page{
@ -433,10 +433,9 @@ div.item_index{
text-indent: -7mm;
margin-left: 7mm;
}
div.item_index li a::after{
content: ", " target-counter(attr(href), page);
div.item_index li span.item-refs a::after{
content: target-counter(attr(href), page);
font-weight: bold;
margin-left: -0.15em; /* To remove the weird gaps between inline-blocks. This seems to be a general problem in HTML5/CSS3 */
}
div.item_index li span.item_nr{
width: 10mm;
@ -614,6 +613,10 @@ p{
div.contribution.ultrasonic-dreams div.title-wrapper + p + p > b{
margin-left: -7mm;
}
div.force-text-indent {
display: block;
text-indent: var(--first-indent) !important;
}
ul{
margin: 18px 0 !important;
@ -652,11 +655,24 @@ ul > li {
div.list-without-markers ul > li{
text-indent: -1em !important;
}
div.list-without-markers ul > li:before{
div.list-without-markers ul > li::before{
content: none !important;
margin-left: unset;
margin-right: unset;
}
div.contribution div.list-with-custom-markers ul > li{
text-indent: 0 !important;
}
div.contribution div.list-with-custom-markers ul > li::before{
content: none !important;
margin-left: unset;
margin-right: unset;
}
div.contribution div.list-with-custom-markers ul > li > span.custom-marker{
display: inline-block;
text-indent: -0.85em !important;
margin-left: -0.25em;
}
/* notes (ol.references) and references (ul.references) */
ol.references,

View File

@ -6,6 +6,7 @@
<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
<!-- <link href="./css/baseline.css" rel="stylesheet" type="text/css" media="print"> -->
<script>
// Thank you paged.js team for the Hyphenopoly tip!
// config for hyphenopoly
var Hyphenopoly = {
require: {
@ -39,9 +40,38 @@
<script src="./js/runHyphens.js"></script>
</head>
<body>
<div id="wrapper">
{{ publication_unfolded }}
</div>
</body>
<div id="wrapper">
{{ publication_unfolded }}
</div>
<script>
// With many thanks to Julien Taquet for digging into Paged.js
// to find a way to remove hyphenated words on page breaks!!
class noHyphenBetweenPage extends Paged.Handler {
constructor(chunker, polisher, caller) {
super(chunker, polisher, caller);
this.hyphenToken;
}
afterPageLayout(pageFragment, page, breakToken) {
if (pageFragment.querySelector('.pagedjs_hyphen')) {
// find the hyphenated word
let block = pageFragment.querySelector('.pagedjs_hyphen');
block.dataset.ref = this.prevHyphen;
// move the breakToken
let offsetMove = getFinalWord(block.innerHTML).length;
// move the token accordingly
page.breakToken = page.endToken.offset - offsetMove;
// remove the last word
block.innerHTML = block.innerHTML.replace(getFinalWord(block.innerHTML), "");
breakToken.offset = page.endToken.offset - offsetMove;
}
}
}
Paged.registerHandlers(noHyphenBetweenPage);
function getFinalWord(words) {
var n = words.split(" ");
return n[n.length - 1];
}
</script>
</body>
</html>

View File

@ -49,6 +49,12 @@ def download_media(html, images, wiki):
if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
# tmp list for filename replacements
replaced = []
images.sort()
images.reverse() # reverse to make sure that 01.png does not override Image01.png in the filename replacements later
# download media files
for filename in images:
filename = filename.replace(' ', '_') # safe filenames
@ -81,15 +87,26 @@ def download_media(html, images, wiki):
import time
time.sleep(3) # do not overload the server
# replace src link
# replace src image link (from wiki folder structure to local folder)
image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
matches = re.findall(rf'src="/images/.*?px-{ filename }"', html) # for debugging
if matches:
html = re.sub(rf'src="/images/.*?px-{ filename }"', f'src="{ image_path }"', html)
else:
matches = re.findall(rf'src="/images/.*?{ filename }"', html) # for debugging
html = re.sub(rf'src="/images/.*?{ filename }"', f'src="{ image_path }"', html)
# print(f'{filename}: {matches}\n------') # for debugging: each image should have the correct match!
img_path_patterns = [rf'(?<!\.)/images/.*?px-{ filename }', rf'(?<!\.)/images/.*?{ filename }']
for img_path_pattern in img_path_patterns:
matches = re.findall(img_path_pattern, html) # for debugging
# print(f'{ filename }\n')
if matches:
for match in matches:
if match not in replaced:
# print(f' { match } --> { image_path }') # for debugging: each image should have the correct match!
html = html.replace(match, image_path)
replaced.append(match)
# else:
# print(' already replaced!')
# print('\n------\n')
# break
# else:
# print(' no match!')
# print('\n------\n')
return html
@ -97,37 +114,38 @@ def add_item_inventory_links(html):
"""
html = string (HTML)
"""
# THROUGHOUT THE BOOK
# Find all references in the text to the item index
pattern = r'Item \d\d\d'
matches = re.findall(pattern, html)
matches = re.findall(r'\w.*?Item \d\d\d.*?\w\w\w', html) # Dodgy attempt to find unique patterns for each mentioning of Item ###
index = {}
new_html = ''
from nltk.tokenize import sent_tokenize
for line in sent_tokenize(html):
for match in matches:
if match in line:
number = match.replace('Item ', '').strip()
if not number in index:
index[number] = []
count = 1
else:
count = index[number][-1] + 1
index[number].append(count)
item_id = f'ii-{ number }-{ index[number][-1] }'
line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')
# the line is pushed back to the new_html
new_html += line + ' '
# Also add a <span> around the index nr to style it
matches = re.findall(r'<li>\d\d\d', new_html)
for match in matches:
new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')
item_match = re.search(r'Item \d\d\d', match)
item = item_match.group()
number = item.replace('Item ', '').strip()
text = match.replace(f'Item { number }', '')
if not number in index:
index[number] = []
count = 1
else:
count = index[number][-1] + 1
index[number].append(count)
item_id = f'ii-{ number }-{ index[number][-1] }'
print(f'match: { number } --> { item_id } --> { text }')
html = html.replace(match, f'<a id="{ item_id }" href="#Item_Index">Item { number }</a>{ text }')
# import json
# print(json.dumps(index, indent=4))
# IN THE ITEM INDEX
# Also add a <span> around the index nr to style it
matches = re.findall(r'<li>\d\d\d', html)
for match in matches:
html = html.replace(match, f'<li><span class="item_nr">{ match }</span>')
print("\n-------------\n")
print("The following items ('###') appear [#, #, ...] many times in the book:\n")
sorted_index = dict(sorted(index.items()))
print(sorted_index)
print("\n-------------\n")
return new_html
return html
def tweaking(html):
"""
@ -157,14 +175,7 @@ def tweaking(html):
html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>')
html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
html = html.replace('<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities: A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities:<br>A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
# html = html.replace('trans*generational, trans*media, trans*disciplinary, trans*geopolitical, trans*expertise, and trans*genealogical concerns', 'trans✶generational, trans✶media, trans✶disciplinary, trans✶geopolitical, trans✶expertise, and trans✶genealogical concerns')
# html = html.replace('trans*generational', 'trans*generational')
# html = html.replace('trans*media', 'trans✶media')
# html = html.replace('trans*disciplinary', 'trans✶disciplinary')
# html = html.replace('trans*geopolitical', 'trans✶geopolitical')
# html = html.replace('trans*activists', 'trans✶activists')
# html = html.replace('trans*expertise', 'trans✶expertise')
# html = html.replace('trans*genealogical', 'trans✶genealogical')
html = html.replace('T*fRP', 'T✶fRP')
html = html.replace('trans*', 'trans✶')
html = html.replace('Trans*', 'trans✶')
html = html.replace('(*)', '(✶)')