sync
This commit is contained in:
parent
39b3220444
commit
3f8ccedc8c
@ -85,8 +85,8 @@ body{
|
||||
letter-spacing: 0.01em;
|
||||
/*hyphens: auto;*/
|
||||
/*-webkit-hyphenate-limit-chars: 8 2 4;*/ /* word length, minimum number of characters before and after the hyphen -- does not work in chrome */
|
||||
orphans: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
||||
widows: unset; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
||||
orphans: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
||||
widows: 0; /* Default is 2. These might trigger disappearing sentences at pagebreak? https://mattermost.pagedmedia.org/pagedmedia/pl/xsetebgcbbddzggwbbn93e5k1a */
|
||||
}
|
||||
|
||||
@page{
|
||||
@ -433,10 +433,9 @@ div.item_index{
|
||||
text-indent: -7mm;
|
||||
margin-left: 7mm;
|
||||
}
|
||||
div.item_index li a::after{
|
||||
content: ", " target-counter(attr(href), page);
|
||||
div.item_index li span.item-refs a::after{
|
||||
content: target-counter(attr(href), page);
|
||||
font-weight: bold;
|
||||
margin-left: -0.15em; /* To remove the weird gaps between inline-blocks. This seems to be a general problem in HTML5/CSS3 */
|
||||
}
|
||||
div.item_index li span.item_nr{
|
||||
width: 10mm;
|
||||
@ -614,6 +613,10 @@ p{
|
||||
div.contribution.ultrasonic-dreams div.title-wrapper + p + p > b{
|
||||
margin-left: -7mm;
|
||||
}
|
||||
div.force-text-indent {
|
||||
display: block;
|
||||
text-indent: var(--first-indent) !important;
|
||||
}
|
||||
|
||||
ul{
|
||||
margin: 18px 0 !important;
|
||||
@ -652,11 +655,24 @@ ul > li {
|
||||
div.list-without-markers ul > li{
|
||||
text-indent: -1em !important;
|
||||
}
|
||||
div.list-without-markers ul > li:before{
|
||||
div.list-without-markers ul > li::before{
|
||||
content: none !important;
|
||||
margin-left: unset;
|
||||
margin-right: unset;
|
||||
}
|
||||
div.contribution div.list-with-custom-markers ul > li{
|
||||
text-indent: 0 !important;
|
||||
}
|
||||
div.contribution div.list-with-custom-markers ul > li::before{
|
||||
content: none !important;
|
||||
margin-left: unset;
|
||||
margin-right: unset;
|
||||
}
|
||||
div.contribution div.list-with-custom-markers ul > li > span.custom-marker{
|
||||
display: inline-block;
|
||||
text-indent: -0.85em !important;
|
||||
margin-left: -0.25em;
|
||||
}
|
||||
|
||||
/* notes (ol.references) and references (ul.references) */
|
||||
ol.references,
|
||||
|
@ -6,6 +6,7 @@
|
||||
<link href="./css/print.css" rel="stylesheet" type="text/css" media="print">
|
||||
<!-- <link href="./css/baseline.css" rel="stylesheet" type="text/css" media="print"> -->
|
||||
<script>
|
||||
// Thank you paged.js team for the Hyphenopoly tip!
|
||||
// config for hyphenopoly
|
||||
var Hyphenopoly = {
|
||||
require: {
|
||||
@ -39,9 +40,38 @@
|
||||
<script src="./js/runHyphens.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="wrapper">
|
||||
{{ publication_unfolded }}
|
||||
</div>
|
||||
</body>
|
||||
<div id="wrapper">
|
||||
{{ publication_unfolded }}
|
||||
</div>
|
||||
<script>
|
||||
// With many thanks to Julien Taquet for digging into Paged.js
|
||||
// to find a way to remove hyphenated words on page breaks!!
|
||||
class noHyphenBetweenPage extends Paged.Handler {
|
||||
constructor(chunker, polisher, caller) {
|
||||
super(chunker, polisher, caller);
|
||||
this.hyphenToken;
|
||||
}
|
||||
afterPageLayout(pageFragment, page, breakToken) {
|
||||
if (pageFragment.querySelector('.pagedjs_hyphen')) {
|
||||
// find the hyphenated word
|
||||
let block = pageFragment.querySelector('.pagedjs_hyphen');
|
||||
block.dataset.ref = this.prevHyphen;
|
||||
// move the breakToken
|
||||
let offsetMove = getFinalWord(block.innerHTML).length;
|
||||
// move the token accordingly
|
||||
page.breakToken = page.endToken.offset - offsetMove;
|
||||
// remove the last word
|
||||
block.innerHTML = block.innerHTML.replace(getFinalWord(block.innerHTML), "");
|
||||
breakToken.offset = page.endToken.offset - offsetMove;
|
||||
}
|
||||
}
|
||||
}
|
||||
Paged.registerHandlers(noHyphenBetweenPage);
|
||||
|
||||
function getFinalWord(words) {
|
||||
var n = words.split(" ");
|
||||
return n[n.length - 1];
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -49,6 +49,12 @@ def download_media(html, images, wiki):
|
||||
if not os.path.exists(f'{ STATIC_FOLDER_PATH }/images'):
|
||||
os.makedirs(f'{ STATIC_FOLDER_PATH }/images')
|
||||
|
||||
# tmp list for filename replacements
|
||||
replaced = []
|
||||
|
||||
images.sort()
|
||||
images.reverse() # reverse to make sure that 01.png does not override Image01.png in the filename replacements later
|
||||
|
||||
# download media files
|
||||
for filename in images:
|
||||
filename = filename.replace(' ', '_') # safe filenames
|
||||
@ -81,15 +87,26 @@ def download_media(html, images, wiki):
|
||||
import time
|
||||
time.sleep(3) # do not overload the server
|
||||
|
||||
# replace src link
|
||||
# replace src image link (from wiki folder structure to local folder)
|
||||
image_path = f'{ PUBLIC_STATIC_FOLDER_PATH }/images/{ filename }' # here the images need to link to the / of the domain, for flask :/// confusing! this breaks the whole idea to still be able to make a local copy of the file
|
||||
matches = re.findall(rf'src="/images/.*?px-{ filename }"', html) # for debugging
|
||||
if matches:
|
||||
html = re.sub(rf'src="/images/.*?px-{ filename }"', f'src="{ image_path }"', html)
|
||||
else:
|
||||
matches = re.findall(rf'src="/images/.*?{ filename }"', html) # for debugging
|
||||
html = re.sub(rf'src="/images/.*?{ filename }"', f'src="{ image_path }"', html)
|
||||
# print(f'{filename}: {matches}\n------') # for debugging: each image should have the correct match!
|
||||
|
||||
img_path_patterns = [rf'(?<!\.)/images/.*?px-{ filename }', rf'(?<!\.)/images/.*?{ filename }']
|
||||
for img_path_pattern in img_path_patterns:
|
||||
matches = re.findall(img_path_pattern, html) # for debugging
|
||||
# print(f'{ filename }\n')
|
||||
if matches:
|
||||
for match in matches:
|
||||
if match not in replaced:
|
||||
# print(f' { match } --> { image_path }') # for debugging: each image should have the correct match!
|
||||
html = html.replace(match, image_path)
|
||||
replaced.append(match)
|
||||
# else:
|
||||
# print(' already replaced!')
|
||||
# print('\n------\n')
|
||||
# break
|
||||
# else:
|
||||
# print(' no match!')
|
||||
# print('\n------\n')
|
||||
|
||||
return html
|
||||
|
||||
@ -97,37 +114,38 @@ def add_item_inventory_links(html):
|
||||
"""
|
||||
html = string (HTML)
|
||||
"""
|
||||
# THROUGHOUT THE BOOK
|
||||
# Find all references in the text to the item index
|
||||
pattern = r'Item \d\d\d'
|
||||
matches = re.findall(pattern, html)
|
||||
matches = re.findall(r'\w.*?Item \d\d\d.*?\w\w\w', html) # Dodgy attempt to find unique patterns for each mentioning of Item ###
|
||||
index = {}
|
||||
new_html = ''
|
||||
from nltk.tokenize import sent_tokenize
|
||||
for line in sent_tokenize(html):
|
||||
for match in matches:
|
||||
if match in line:
|
||||
number = match.replace('Item ', '').strip()
|
||||
if not number in index:
|
||||
index[number] = []
|
||||
count = 1
|
||||
else:
|
||||
count = index[number][-1] + 1
|
||||
index[number].append(count)
|
||||
item_id = f'ii-{ number }-{ index[number][-1] }'
|
||||
line = line.replace(match, f'Item <a id="{ item_id }" href="#Item_Index">{ number }</a>')
|
||||
|
||||
# the line is pushed back to the new_html
|
||||
new_html += line + ' '
|
||||
|
||||
# Also add a <span> around the index nr to style it
|
||||
matches = re.findall(r'<li>\d\d\d', new_html)
|
||||
for match in matches:
|
||||
new_html = new_html.replace(match, f'<li><span class="item_nr">{ match }</span>')
|
||||
item_match = re.search(r'Item \d\d\d', match)
|
||||
item = item_match.group()
|
||||
number = item.replace('Item ', '').strip()
|
||||
text = match.replace(f'Item { number }', '')
|
||||
if not number in index:
|
||||
index[number] = []
|
||||
count = 1
|
||||
else:
|
||||
count = index[number][-1] + 1
|
||||
index[number].append(count)
|
||||
item_id = f'ii-{ number }-{ index[number][-1] }'
|
||||
print(f'match: { number } --> { item_id } --> { text }')
|
||||
html = html.replace(match, f'<a id="{ item_id }" href="#Item_Index">Item { number }</a>{ text }')
|
||||
|
||||
# import json
|
||||
# print(json.dumps(index, indent=4))
|
||||
# IN THE ITEM INDEX
|
||||
# Also add a <span> around the index nr to style it
|
||||
matches = re.findall(r'<li>\d\d\d', html)
|
||||
for match in matches:
|
||||
html = html.replace(match, f'<li><span class="item_nr">{ match }</span>')
|
||||
|
||||
print("\n-------------\n")
|
||||
print("The following items ('###') appear [#, #, ...] many times in the book:\n")
|
||||
sorted_index = dict(sorted(index.items()))
|
||||
print(sorted_index)
|
||||
print("\n-------------\n")
|
||||
|
||||
return new_html
|
||||
return html
|
||||
|
||||
def tweaking(html):
|
||||
"""
|
||||
@ -157,14 +175,7 @@ def tweaking(html):
|
||||
html = html.replace('<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities: Accidented and dissonant spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>', '<h1><span class="mw-headline" id="Depths_and_Densities:_Accidented_and_dissonant_spacetimes"><a href="#Depths_and_densities" title="Depths and densities">Depths and Densities:<br>Accidented<br>and dissonant<br>spacetimes</a></span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h1>')
|
||||
html = html.replace('<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions: a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Open_Boundary_Conditions:_a_grid_for_intensive_study">Open Boundary Conditions:<br>a grid for intensive study</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
|
||||
html = html.replace('<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities: A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>', '<h2><span class="mw-headline" id="Depths_and_Densities:_A_Bugged_Report">Depths and Densities:<br>A Bugged Report</span><span class="mw-editsection"><span class="mw-editsection-bracket"></span></span></h2>')
|
||||
# html = html.replace('trans*generational, trans*media, trans*disciplinary, trans*geopolitical, trans*expertise, and trans*genealogical concerns', 'trans✶generational, trans✶media, trans✶disciplinary, trans✶geopolitical, trans✶expertise, and trans✶genealogical concerns')
|
||||
# html = html.replace('trans*generational', 'trans*generational')
|
||||
# html = html.replace('trans*media', 'trans✶media')
|
||||
# html = html.replace('trans*disciplinary', 'trans✶disciplinary')
|
||||
# html = html.replace('trans*geopolitical', 'trans✶geopolitical')
|
||||
# html = html.replace('trans*activists', 'trans✶activists')
|
||||
# html = html.replace('trans*expertise', 'trans✶expertise')
|
||||
# html = html.replace('trans*genealogical', 'trans✶genealogical')
|
||||
html = html.replace('T*fRP', 'T✶fRP')
|
||||
html = html.replace('trans*', 'trans✶')
|
||||
html = html.replace('Trans*', 'trans✶')
|
||||
html = html.replace('(*)', '(✶)')
|
||||
|
Loading…
Reference in New Issue
Block a user