sync

2021-12-03 00:01:13 +01:00 · 2021-12-03 00:01:13 +01:00 · 3f8ccedc8c
commit 3f8ccedc8c
parent 39b3220444
3 changed files with 109 additions and 52 deletions
--- a/command-line/css/print.css
+++ b/command-line/css/print.css
@ -85,8 +85,8 @@ body{
    letter-spacing: 0.01em;
    /*hyphens: auto;*/
    /*-webkit-hyphenate-limit-chars: 8 2 4;*/ /* word length, minimum number of characters before and after the hyphen -- does not work in chrome */
-    orphans: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
+    orphans: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
-    widows: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
+    widows: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
 }
@page{
@ -433,10 +433,9 @@ div.item_index{
        text-indent: -7mm;
        margin-left: 7mm;
    }
-    div.item_index li a::after{
+    div.item_index li span.item-refs a::after{
-        content: ", " target-counter(attr(href), page);
+        content: target-counter(attr(href), page);
        font-weight: bold;
        margin-left: -0.15em; /* To remove the weird gaps between inline-blocks. This seems to be a general problem in HTML5/CSS3 */
    }
    div.item_index li span.item_nr{
        width: 10mm;
@ -614,6 +613,10 @@ p{
    div.contribution.ultrasonic-dreams div.title-wrapper + p + p > b{
        margin-left: -7mm;
    }
    div.force-text-indent {
        display: block;
        text-indent: var(--first-indent) !important;
    }
 ul{
    margin: 18px 0 !important;
@ -652,11 +655,24 @@ ul > li {
        div.list-without-markers ul > li{
            text-indent: -1em !important;
        }
-        div.list-without-markers ul > li:before{
+        div.list-without-markers ul > li::before{
            content: none !important;
            margin-left: unset;
            margin-right: unset;
        }
        div.contribution div.list-with-custom-markers ul > li{
            text-indent: 0 !important;
        }
        div.contribution div.list-with-custom-markers ul > li::before{
            content: none !important;
            margin-left: unset;
            margin-right: unset;
        }
        div.contribution div.list-with-custom-markers ul > li > span.custom-marker{
            display: inline-block;
            text-indent: -0.85em !important;
            margin-left: -0.25em;
        }
    /* notes (ol.references) and references (ul.references) */
    ol.references,
--- a/command-line/templates/template.html
+++ b/command-line/templates/template.html
@ -6,6 +6,7 @@
 	<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
 	<!-- <link href="./css/baseline.css" rel="stylesheet" type="text/css" media="print"> -->
 	<script>
        // Thank you paged.js team for the Hyphenopoly tip!
        // config for hyphenopoly
        var Hyphenopoly = {
            require: {
@ -39,9 +40,38 @@
    <script src="./js/runHyphens.js"></script>
 </head>
 <body>
-	<div id="wrapper">
+    <div id="wrapper">
-		{{ publication_unfolded }}
+    	{{ publication_unfolded }}
-	</div>
+    </div>
-</body>
+    <script>
    // With many thanks to Julien Taquet for digging into Paged.js
    // to find a way to remove hyphenated words on page breaks!!
    class noHyphenBetweenPage extends Paged.Handler {
        constructor(chunker, polisher, caller) {
            super(chunker, polisher, caller);
            this.hyphenToken;
        }
        afterPageLayout(pageFragment, page, breakToken) {
            if (pageFragment.querySelector('.pagedjs_hyphen')) {
                // find the hyphenated word  
                let block = pageFragment.querySelector('.pagedjs_hyphen');
                block.dataset.ref = this.prevHyphen;
                // move the breakToken
                let offsetMove = getFinalWord(block.innerHTML).length;
                // move the token accordingly
                page.breakToken = page.endToken.offset - offsetMove;
                // remove the last word
                block.innerHTML = block.innerHTML.replace(getFinalWord(block.innerHTML), "");
                breakToken.offset = page.endToken.offset - offsetMove;       
            }
        }
    }
    Paged.registerHandlers(noHyphenBetweenPage);
    function getFinalWord(words) {
        var n = words.split(" ");
        return n[n.length - 1];
    }
    </script>
 </body>
 </html>
--- a/command-line/update.py
+++ b/command-line/update.py
@ -49,6 +49,12 @@ def download_media(html, images, wiki):
 	if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
 		os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
 	# tmp list for filename replacements 
 	replaced = []
 	images.sort()
 	images.reverse() # reverse to make sure that 01.png does not override Image01.png in the filename replacements later
 	# download media files
 	for filename in images:
 		filename = filename.replace(' ', '_') # safe filenames
@ -81,15 +87,26 @@ def download_media(html, images, wiki):
 			import time
 			time.sleep(3) # do not overload the server
-		# replace src link
+		# replace src image link (from wiki folder structure to local folder)
 		image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
-		matches = re.findall(rf'src="/images/.*?px-{ filename }"', html) # for debugging
+		
-		if matches:
+		img_path_patterns = [rf'(?<!\.)/images/.*?px-{ filename }', rf'(?<!\.)/images/.*?{ filename }']
-			html = re.sub(rf'src="/images/.*?px-{ filename }"', f'src="{ image_path }"', html)
+		for img_path_pattern in img_path_patterns:
-		else:
+			matches = re.findall(img_path_pattern, html) # for debugging
-			matches = re.findall(rf'src="/images/.*?{ filename }"', html) # for debugging
+			# print(f'{ filename }\n')
-			html = re.sub(rf'src="/images/.*?{ filename }"', f'src="{ image_path }"', html) 
+			if matches:
-		# print(f'{filename}: {matches}\n------') # for debugging: each image should have the correct match!
+				for match in matches:
 					if match not in replaced:
 						# print(f'    { match } --> { image_path }') # for debugging: each image should have the correct match!
 						html = html.replace(match, image_path)
 						replaced.append(match)
 					# else:
 						# print('    already replaced!')
 				# print('\n------\n')
 				# break		
 			# else:
 				# print('    no match!')
 			# print('\n------\n')
 	return html
@ -97,37 +114,38 @@ def add_item_inventory_links(html):
 	"""
 		html = string (HTML)
 	"""
 	# THROUGHOUT THE BOOK
 	# Find all references in the text to the item index
-	pattern = r'Item \d\d\d'
+	matches = re.findall(r'\w.*?Item \d\d\d.*?\w\w\w', html) # Dodgy attempt to find unique patterns for each mentioning of Item ###
 	matches = re.findall(pattern, html)
 	index = {}
 	new_html = ''
 	from nltk.tokenize import sent_tokenize
 	for line in sent_tokenize(html):
 		for match in matches:
 			if match in line:
 				number = match.replace('Item ', '').strip()
 				if not number in index:
 					index[number] = []
 					count = 1
 				else:
 					count = index[number][-1] + 1
 				index[number].append(count)
 				item_id = f'ii-{ number }-{ index[number][-1] }'
 				line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')
 		# the line is pushed back to the new_html
 		new_html += line + ' '
 	# Also add a <span> around the index nr to style it
 	matches = re.findall(r'<li>\d\d\d', new_html)
 	for match in matches:
-		new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')
+		item_match = re.search(r'Item \d\d\d', match)
 		item = item_match.group()
 		number = item.replace('Item ', '').strip()
 		text = match.replace(f'Item { number }', '')
 		if not number in index:
 			index[number] = []
 			count = 1
 		else:
 			count = index[number][-1] + 1
 		index[number].append(count)
 		item_id = f'ii-{ number }-{ index[number][-1] }'
 		print(f'match: { number } --> { item_id } --> { text }')
 		html = html.replace(match, f'<a id="{ item_id }" href="#Item_Index">Item { number }</a>{ text }')
-	# import json
+	# IN THE ITEM INDEX
-	# print(json.dumps(index, indent=4))
+	# Also add a <span> around the index nr to style it
 	matches = re.findall(r'<li>\d\d\d', html)
 	for match in matches:
 		html = html.replace(match, f'<li><span class="item_nr">{ match }</span>')
 	print("\n-------------\n")
 	print("The following items ('###') appear [#, #, ...] many times in the book:\n")
 	sorted_index = dict(sorted(index.items()))
 	print(sorted_index)
 	print("\n-------------\n")
-	return new_html
+	return html
 def tweaking(html):
 	"""
@ -157,14 +175,7 @@ def tweaking(html):
 	html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>') 
 	html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
 	html = html.replace('<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities: A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities:<br>A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>') 
-	# html = html.replace('trans*generational, trans*media, trans*disciplinary, trans*geopolitical, trans*expertise, and trans*genealogical concerns', 'trans✶generational, trans✶media, trans✶disciplinary, trans✶geopolitical, trans✶expertise, and trans✶genealogical concerns') 
+	html = html.replace('T*fRP', 'T✶fRP') 
 	# html = html.replace('trans*generational', 'trans*generational') 
 	# html = html.replace('trans*media', 'trans✶media') 
 	# html = html.replace('trans*disciplinary', 'trans✶disciplinary') 
 	# html = html.replace('trans*geopolitical', 'trans✶geopolitical') 
 	# html = html.replace('trans*activists', 'trans✶activists') 
 	# html = html.replace('trans*expertise', 'trans✶expertise') 
 	# html = html.replace('trans*genealogical', 'trans✶genealogical') 
 	html = html.replace('trans*', 'trans✶') 
 	html = html.replace('Trans*', 'trans✶') 
 	html = html.replace('(*)', '(✶)')