sync

2021-12-03 00:01:13 +01:00 · 2021-12-03 00:01:13 +01:00 · 3f8ccedc8c
commit 3f8ccedc8c
parent 39b3220444
3 changed files with 109 additions and 52 deletions
--- a/command-line/css/print.css
+++ b/command-line/css/print.css
@ -85,8 +85,8 @@ body{
    letter-spacing: 0.01em;
    /*hyphens: auto;*/
    /*-webkit-hyphenate-limit-chars: 8 2 4;*/ /* word length, minimum number of characters before and after the hyphen -- does not work in chrome */
-    orphans: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
-    widows: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
+    orphans: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
+    widows: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
 }

@page{
@ -433,10 +433,9 @@ div.item_index{
        text-indent: -7mm;
        margin-left: 7mm;
    }
-    div.item_index li a::after{
-        content: ", " target-counter(attr(href), page);
+    div.item_index li span.item-refs a::after{
+        content: target-counter(attr(href), page);
        font-weight: bold;
-        margin-left: -0.15em; /* To remove the weird gaps between inline-blocks. This seems to be a general problem in HTML5/CSS3 */
    }
    div.item_index li span.item_nr{
        width: 10mm;
@ -614,6 +613,10 @@ p{
    div.contribution.ultrasonic-dreams div.title-wrapper + p + p > b{
        margin-left: -7mm;
    }
+    div.force-text-indent {
+        display: block;
+        text-indent: var(--first-indent) !important;
+    }

 ul{
    margin: 18px 0 !important;
@ -652,11 +655,24 @@ ul > li {
        div.list-without-markers ul > li{
            text-indent: -1em !important;
        }
-        div.list-without-markers ul > li:before{
+        div.list-without-markers ul > li::before{
            content: none !important;
            margin-left: unset;
            margin-right: unset;
        }
+        div.contribution div.list-with-custom-markers ul > li{
+            text-indent: 0 !important;
+        }
+        div.contribution div.list-with-custom-markers ul > li::before{
+            content: none !important;
+            margin-left: unset;
+            margin-right: unset;
+        }
+        div.contribution div.list-with-custom-markers ul > li > span.custom-marker{
+            display: inline-block;
+            text-indent: -0.85em !important;
+            margin-left: -0.25em;
+        }

    /* notes (ol.references) and references (ul.references) */
    ol.references,
--- a/command-line/templates/template.html
+++ b/command-line/templates/template.html
@ -6,6 +6,7 @@
 	<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
 	<!-- <link href="./css/baseline.css" rel="stylesheet" type="text/css" media="print"> -->
 	<script>
+        // Thank you paged.js team for the Hyphenopoly tip!
        // config for hyphenopoly
        var Hyphenopoly = {
            require: {
@ -39,9 +40,38 @@
    <script src="./js/runHyphens.js"></script>
 </head>
 <body>
-	<div id="wrapper">
-		{{ publication_unfolded }}
-	</div>
-</body>
+    <div id="wrapper">
+    	{{ publication_unfolded }}
+    </div>
+    <script>
+    // With many thanks to Julien Taquet for digging into Paged.js
+    // to find a way to remove hyphenated words on page breaks!!
+    class noHyphenBetweenPage extends Paged.Handler {
+        constructor(chunker, polisher, caller) {
+            super(chunker, polisher, caller);
+            this.hyphenToken;
+        }
+        afterPageLayout(pageFragment, page, breakToken) {
+            if (pageFragment.querySelector('.pagedjs_hyphen')) {
+                // find the hyphenated word  
+                let block = pageFragment.querySelector('.pagedjs_hyphen');
+                block.dataset.ref = this.prevHyphen;
+                // move the breakToken
+                let offsetMove = getFinalWord(block.innerHTML).length;
+                // move the token accordingly
+                page.breakToken = page.endToken.offset - offsetMove;
+                // remove the last word
+                block.innerHTML = block.innerHTML.replace(getFinalWord(block.innerHTML), "");
+                breakToken.offset = page.endToken.offset - offsetMove;       
+            }
+        }
+    }
+    Paged.registerHandlers(noHyphenBetweenPage);

+    function getFinalWord(words) {
+        var n = words.split(" ");
+        return n[n.length - 1];
+    }
+    </script>
+</body>
 </html>
--- a/command-line/update.py
+++ b/command-line/update.py
@ -49,6 +49,12 @@ def download_media(html, images, wiki):
 	if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
 		os.makedirs(f'{ STATIC_FOLDER_PATH }/images')

+	# tmp list for filename replacements 
+	replaced = []
+	
+	images.sort()
+	images.reverse() # reverse to make sure that 01.png does not override Image01.png in the filename replacements later
+
 	# download media files
 	for filename in images:
 		filename = filename.replace(' ', '_') # safe filenames
@ -81,15 +87,26 @@ def download_media(html, images, wiki):
 			import time
 			time.sleep(3) # do not overload the server

-		# replace src link
+		# replace src image link (from wiki folder structure to local folder)
 		image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
-		matches = re.findall(rf'src="/images/.*?px-{ filename }"', html) # for debugging
-		if matches:
-			html = re.sub(rf'src="/images/.*?px-{ filename }"', f'src="{ image_path }"', html)
-		else:
-			matches = re.findall(rf'src="/images/.*?{ filename }"', html) # for debugging
-			html = re.sub(rf'src="/images/.*?{ filename }"', f'src="{ image_path }"', html) 
-		# print(f'{filename}: {matches}\n------') # for debugging: each image should have the correct match!
+		
+		img_path_patterns = [rf'(?<!\.)/images/.*?px-{ filename }', rf'(?<!\.)/images/.*?{ filename }']
+		for img_path_pattern in img_path_patterns:
+			matches = re.findall(img_path_pattern, html) # for debugging
+			# print(f'{ filename }\n')
+			if matches:
+				for match in matches:
+					if match not in replaced:
+						# print(f'    { match } --> { image_path }') # for debugging: each image should have the correct match!
+						html = html.replace(match, image_path)
+						replaced.append(match)
+					# else:
+						# print('    already replaced!')
+				# print('\n------\n')
+				# break		
+			# else:
+				# print('    no match!')
+			# print('\n------\n')

 	return html

@ -97,37 +114,38 @@ def add_item_inventory_links(html):
 	"""
 		html = string (HTML)
 	"""
+	# THROUGHOUT THE BOOK
 	# Find all references in the text to the item index
-	pattern = r'Item \d\d\d'
-	matches = re.findall(pattern, html)
+	matches = re.findall(r'\w.*?Item \d\d\d.*?\w\w\w', html) # Dodgy attempt to find unique patterns for each mentioning of Item ###
 	index = {}
-	new_html = ''
-	from nltk.tokenize import sent_tokenize
-	for line in sent_tokenize(html):
-		for match in matches:
-			if match in line:
-				number = match.replace('Item ', '').strip()
-				if not number in index:
-					index[number] = []
-					count = 1
-				else:
-					count = index[number][-1] + 1
-				index[number].append(count)
-				item_id = f'ii-{ number }-{ index[number][-1] }'
-				line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')
-
-		# the line is pushed back to the new_html
-		new_html += line + ' '
-		
-	# Also add a <span> around the index nr to style it
-	matches = re.findall(r'<li>\d\d\d', new_html)
 	for match in matches:
-		new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')
+		item_match = re.search(r'Item \d\d\d', match)
+		item = item_match.group()
+		number = item.replace('Item ', '').strip()
+		text = match.replace(f'Item { number }', '')
+		if not number in index:
+			index[number] = []
+			count = 1
+		else:
+			count = index[number][-1] + 1
+		index[number].append(count)
+		item_id = f'ii-{ number }-{ index[number][-1] }'
+		print(f'match: { number } --> { item_id } --> { text }')
+		html = html.replace(match, f'<a id="{ item_id }" href="#Item_Index">Item { number }</a>{ text }')

-	# import json
-	# print(json.dumps(index, indent=4))
+	# IN THE ITEM INDEX
+	# Also add a <span> around the index nr to style it
+	matches = re.findall(r'<li>\d\d\d', html)
+	for match in matches:
+		html = html.replace(match, f'<li><span class="item_nr">{ match }</span>')
+
+	print("\n-------------\n")
+	print("The following items ('###') appear [#, #, ...] many times in the book:\n")
+	sorted_index = dict(sorted(index.items()))
+	print(sorted_index)
+	print("\n-------------\n")
 	
-	return new_html
+	return html

 def tweaking(html):
 	"""
@ -157,14 +175,7 @@ def tweaking(html):
 	html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>') 
 	html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
 	html = html.replace('<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities: A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities:<br>A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
-	# html = html.replace('trans*generational, trans*media, trans*disciplinary, trans*geopolitical, trans*expertise, and trans*genealogical concerns', 'trans✶generational, trans✶media, trans✶disciplinary, trans✶geopolitical, trans✶expertise, and trans✶genealogical concerns') 
-	# html = html.replace('trans*generational', 'trans*generational') 
-	# html = html.replace('trans*media', 'trans✶media') 
-	# html = html.replace('trans*disciplinary', 'trans✶disciplinary') 
-	# html = html.replace('trans*geopolitical', 'trans✶geopolitical') 
-	# html = html.replace('trans*activists', 'trans✶activists') 
-	# html = html.replace('trans*expertise', 'trans✶expertise') 
-	# html = html.replace('trans*genealogical', 'trans✶genealogical') 
+	html = html.replace('T*fRP', 'T✶fRP') 
 	html = html.replace('trans*', 'trans✶') 
 	html = html.replace('Trans*', 'trans✶') 
 	html = html.replace('(*)', '(✶)')