sync
This commit is contained in:
parent
39b3220444
commit
3f8ccedc8c
@ -85,8 +85,8 @@ body{
|
|||||||
letter-spacing: 0.01em;
|
letter-spacing: 0.01em;
|
||||||
/*hyphens: auto;*/
|
/*hyphens: auto;*/
|
||||||
/*-webkit-hyphenate-limit-chars: 8 2 4;*/ /* word length, minimum number of characters before and after the hyphen -- does not work in chrome */
|
/*-webkit-hyphenate-limit-chars: 8 2 4;*/ /* word length, minimum number of characters before and after the hyphen -- does not work in chrome */
|
||||||
orphans: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
orphans: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
||||||
widows: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
widows: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
||||||
}
|
}
|
||||||
|
|
||||||
@page{
|
@page{
|
||||||
@ -433,10 +433,9 @@ div.item_index{
|
|||||||
text-indent: -7mm;
|
text-indent: -7mm;
|
||||||
margin-left: 7mm;
|
margin-left: 7mm;
|
||||||
}
|
}
|
||||||
div.item_index li a::after{
|
div.item_index li span.item-refs a::after{
|
||||||
content: ", " target-counter(attr(href), page);
|
content: target-counter(attr(href), page);
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
margin-left: -0.15em; /* To remove the weird gaps between inline-blocks. This seems to be a general problem in HTML5/CSS3 */
|
|
||||||
}
|
}
|
||||||
div.item_index li span.item_nr{
|
div.item_index li span.item_nr{
|
||||||
width: 10mm;
|
width: 10mm;
|
||||||
@ -614,6 +613,10 @@ p{
|
|||||||
div.contribution.ultrasonic-dreams div.title-wrapper + p + p > b{
|
div.contribution.ultrasonic-dreams div.title-wrapper + p + p > b{
|
||||||
margin-left: -7mm;
|
margin-left: -7mm;
|
||||||
}
|
}
|
||||||
|
div.force-text-indent {
|
||||||
|
display: block;
|
||||||
|
text-indent: var(--first-indent) !important;
|
||||||
|
}
|
||||||
|
|
||||||
ul{
|
ul{
|
||||||
margin: 18px 0 !important;
|
margin: 18px 0 !important;
|
||||||
@ -652,11 +655,24 @@ ul > li {
|
|||||||
div.list-without-markers ul > li{
|
div.list-without-markers ul > li{
|
||||||
text-indent: -1em !important;
|
text-indent: -1em !important;
|
||||||
}
|
}
|
||||||
div.list-without-markers ul > li:before{
|
div.list-without-markers ul > li::before{
|
||||||
content: none !important;
|
content: none !important;
|
||||||
margin-left: unset;
|
margin-left: unset;
|
||||||
margin-right: unset;
|
margin-right: unset;
|
||||||
}
|
}
|
||||||
|
div.contribution div.list-with-custom-markers ul > li{
|
||||||
|
text-indent: 0 !important;
|
||||||
|
}
|
||||||
|
div.contribution div.list-with-custom-markers ul > li::before{
|
||||||
|
content: none !important;
|
||||||
|
margin-left: unset;
|
||||||
|
margin-right: unset;
|
||||||
|
}
|
||||||
|
div.contribution div.list-with-custom-markers ul > li > span.custom-marker{
|
||||||
|
display: inline-block;
|
||||||
|
text-indent: -0.85em !important;
|
||||||
|
margin-left: -0.25em;
|
||||||
|
}
|
||||||
|
|
||||||
/* notes (ol.references) and references (ul.references) */
|
/* notes (ol.references) and references (ul.references) */
|
||||||
ol.references,
|
ol.references,
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
|
<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
|
||||||
<!-- <link href="./css/baseline.css" rel="stylesheet" type="text/css" media="print"> -->
|
<!-- <link href="./css/baseline.css" rel="stylesheet" type="text/css" media="print"> -->
|
||||||
<script>
|
<script>
|
||||||
|
// Thank you paged.js team for the Hyphenopoly tip!
|
||||||
// config for hyphenopoly
|
// config for hyphenopoly
|
||||||
var Hyphenopoly = {
|
var Hyphenopoly = {
|
||||||
require: {
|
require: {
|
||||||
@ -39,9 +40,38 @@
|
|||||||
<script src="./js/runHyphens.js"></script>
|
<script src="./js/runHyphens.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="wrapper">
|
<div id="wrapper">
|
||||||
{{ publication_unfolded }}
|
{{ publication_unfolded }}
|
||||||
</div>
|
</div>
|
||||||
</body>
|
<script>
|
||||||
|
// With many thanks to Julien Taquet for digging into Paged.js
|
||||||
|
// to find a way to remove hyphenated words on page breaks!!
|
||||||
|
class noHyphenBetweenPage extends Paged.Handler {
|
||||||
|
constructor(chunker, polisher, caller) {
|
||||||
|
super(chunker, polisher, caller);
|
||||||
|
this.hyphenToken;
|
||||||
|
}
|
||||||
|
afterPageLayout(pageFragment, page, breakToken) {
|
||||||
|
if (pageFragment.querySelector('.pagedjs_hyphen')) {
|
||||||
|
// find the hyphenated word
|
||||||
|
let block = pageFragment.querySelector('.pagedjs_hyphen');
|
||||||
|
block.dataset.ref = this.prevHyphen;
|
||||||
|
// move the breakToken
|
||||||
|
let offsetMove = getFinalWord(block.innerHTML).length;
|
||||||
|
// move the token accordingly
|
||||||
|
page.breakToken = page.endToken.offset - offsetMove;
|
||||||
|
// remove the last word
|
||||||
|
block.innerHTML = block.innerHTML.replace(getFinalWord(block.innerHTML), "");
|
||||||
|
breakToken.offset = page.endToken.offset - offsetMove;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Paged.registerHandlers(noHyphenBetweenPage);
|
||||||
|
|
||||||
|
function getFinalWord(words) {
|
||||||
|
var n = words.split(" ");
|
||||||
|
return n[n.length - 1];
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
</html>
|
</html>
|
@ -49,6 +49,12 @@ def download_media(html, images, wiki):
|
|||||||
if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
|
if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
|
||||||
os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
|
os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
|
||||||
|
|
||||||
|
# tmp list for filename replacements
|
||||||
|
replaced = []
|
||||||
|
|
||||||
|
images.sort()
|
||||||
|
images.reverse() # reverse to make sure that 01.png does not override Image01.png in the filename replacements later
|
||||||
|
|
||||||
# download media files
|
# download media files
|
||||||
for filename in images:
|
for filename in images:
|
||||||
filename = filename.replace(' ', '_') # safe filenames
|
filename = filename.replace(' ', '_') # safe filenames
|
||||||
@ -81,15 +87,26 @@ def download_media(html, images, wiki):
|
|||||||
import time
|
import time
|
||||||
time.sleep(3) # do not overload the server
|
time.sleep(3) # do not overload the server
|
||||||
|
|
||||||
# replace src link
|
# replace src image link (from wiki folder structure to local folder)
|
||||||
image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
|
image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
|
||||||
matches = re.findall(rf'src="/images/.*?px-{ filename }"', html) # for debugging
|
|
||||||
if matches:
|
img_path_patterns = [rf'(?<!\.)/images/.*?px-{ filename }', rf'(?<!\.)/images/.*?{ filename }']
|
||||||
html = re.sub(rf'src="/images/.*?px-{ filename }"', f'src="{ image_path }"', html)
|
for img_path_pattern in img_path_patterns:
|
||||||
else:
|
matches = re.findall(img_path_pattern, html) # for debugging
|
||||||
matches = re.findall(rf'src="/images/.*?{ filename }"', html) # for debugging
|
# print(f'{ filename }\n')
|
||||||
html = re.sub(rf'src="/images/.*?{ filename }"', f'src="{ image_path }"', html)
|
if matches:
|
||||||
# print(f'{filename}: {matches}\n------') # for debugging: each image should have the correct match!
|
for match in matches:
|
||||||
|
if match not in replaced:
|
||||||
|
# print(f' { match } --> { image_path }') # for debugging: each image should have the correct match!
|
||||||
|
html = html.replace(match, image_path)
|
||||||
|
replaced.append(match)
|
||||||
|
# else:
|
||||||
|
# print(' already replaced!')
|
||||||
|
# print('\n------\n')
|
||||||
|
# break
|
||||||
|
# else:
|
||||||
|
# print(' no match!')
|
||||||
|
# print('\n------\n')
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
@ -97,37 +114,38 @@ def add_item_inventory_links(html):
|
|||||||
"""
|
"""
|
||||||
html = string (HTML)
|
html = string (HTML)
|
||||||
"""
|
"""
|
||||||
|
# THROUGHOUT THE BOOK
|
||||||
# Find all references in the text to the item index
|
# Find all references in the text to the item index
|
||||||
pattern = r'Item \d\d\d'
|
matches = re.findall(r'\w.*?Item \d\d\d.*?\w\w\w', html) # Dodgy attempt to find unique patterns for each mentioning of Item ###
|
||||||
matches = re.findall(pattern, html)
|
|
||||||
index = {}
|
index = {}
|
||||||
new_html = ''
|
|
||||||
from nltk.tokenize import sent_tokenize
|
|
||||||
for line in sent_tokenize(html):
|
|
||||||
for match in matches:
|
|
||||||
if match in line:
|
|
||||||
number = match.replace('Item ', '').strip()
|
|
||||||
if not number in index:
|
|
||||||
index[number] = []
|
|
||||||
count = 1
|
|
||||||
else:
|
|
||||||
count = index[number][-1] + 1
|
|
||||||
index[number].append(count)
|
|
||||||
item_id = f'ii-{ number }-{ index[number][-1] }'
|
|
||||||
line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')
|
|
||||||
|
|
||||||
# the line is pushed back to the new_html
|
|
||||||
new_html += line + ' '
|
|
||||||
|
|
||||||
# Also add a <span> around the index nr to style it
|
|
||||||
matches = re.findall(r'<li>\d\d\d', new_html)
|
|
||||||
for match in matches:
|
for match in matches:
|
||||||
new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')
|
item_match = re.search(r'Item \d\d\d', match)
|
||||||
|
item = item_match.group()
|
||||||
|
number = item.replace('Item ', '').strip()
|
||||||
|
text = match.replace(f'Item { number }', '')
|
||||||
|
if not number in index:
|
||||||
|
index[number] = []
|
||||||
|
count = 1
|
||||||
|
else:
|
||||||
|
count = index[number][-1] + 1
|
||||||
|
index[number].append(count)
|
||||||
|
item_id = f'ii-{ number }-{ index[number][-1] }'
|
||||||
|
print(f'match: { number } --> { item_id } --> { text }')
|
||||||
|
html = html.replace(match, f'<a id="{ item_id }" href="#Item_Index">Item { number }</a>{ text }')
|
||||||
|
|
||||||
# import json
|
# IN THE ITEM INDEX
|
||||||
# print(json.dumps(index, indent=4))
|
# Also add a <span> around the index nr to style it
|
||||||
|
matches = re.findall(r'<li>\d\d\d', html)
|
||||||
|
for match in matches:
|
||||||
|
html = html.replace(match, f'<li><span class="item_nr">{ match }</span>')
|
||||||
|
|
||||||
return new_html
|
print("\n-------------\n")
|
||||||
|
print("The following items ('###') appear [#, #, ...] many times in the book:\n")
|
||||||
|
sorted_index = dict(sorted(index.items()))
|
||||||
|
print(sorted_index)
|
||||||
|
print("\n-------------\n")
|
||||||
|
|
||||||
|
return html
|
||||||
|
|
||||||
def tweaking(html):
|
def tweaking(html):
|
||||||
"""
|
"""
|
||||||
@ -157,14 +175,7 @@ def tweaking(html):
|
|||||||
html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>')
|
html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>')
|
||||||
html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
|
html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
|
||||||
html = html.replace('<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities: A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities:<br>A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
|
html = html.replace('<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities: A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities:<br>A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
|
||||||
# html = html.replace('trans*generational, trans*media, trans*disciplinary, trans*geopolitical, trans*expertise, and trans*genealogical concerns', 'trans✶generational, trans✶media, trans✶disciplinary, trans✶geopolitical, trans✶expertise, and trans✶genealogical concerns')
|
html = html.replace('T*fRP', 'T✶fRP')
|
||||||
# html = html.replace('trans*generational', 'trans*generational')
|
|
||||||
# html = html.replace('trans*media', 'trans✶media')
|
|
||||||
# html = html.replace('trans*disciplinary', 'trans✶disciplinary')
|
|
||||||
# html = html.replace('trans*geopolitical', 'trans✶geopolitical')
|
|
||||||
# html = html.replace('trans*activists', 'trans✶activists')
|
|
||||||
# html = html.replace('trans*expertise', 'trans✶expertise')
|
|
||||||
# html = html.replace('trans*genealogical', 'trans✶genealogical')
|
|
||||||
html = html.replace('trans*', 'trans✶')
|
html = html.replace('trans*', 'trans✶')
|
||||||
html = html.replace('Trans*', 'trans✶')
|
html = html.replace('Trans*', 'trans✶')
|
||||||
html = html.replace('(*)', '(✶)')
|
html = html.replace('(*)', '(✶)')
|
||||||
|
Loading…
Reference in New Issue
Block a user