250 lines
9.2 KiB
Python
250 lines
9.2 KiB
Python
|
#! /etc/bin/python3
|
||
|
|
||
|
from bs4 import BeautifulSoup as bs
|
||
|
|
||
|
import os
|
||
|
|
||
|
from functions import *
|
||
|
from get_html_from_wiki import get_html_from_wiki
|
||
|
from create_cover import create_cover
|
||
|
from create_backcover import create_backcover
|
||
|
from create_zone_backcover import create_zone_backcover
|
||
|
from create_intro_text import create_intro_text
|
||
|
from create_works_text_blocks import create_works_text_block
|
||
|
from create_stories_layout import create_stories_layout
|
||
|
from create_glossary import create_glossary
|
||
|
|
||
|
# Generate the publication to PDF:
|
||
|
# $ python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.en.txt -o data-workers.en.pdf
|
||
|
|
||
|
# Add logos.pdf on last page with PDFTK
|
||
|
# $ pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf
|
||
|
# $ pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf
|
||
|
# $ pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.logos.pdf
|
||
|
|
||
|
# PDFTK in one command:
|
||
|
# $ pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf && pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf && pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.logos.pdf
|
||
|
|
||
|
# Generate a new publication in one command:
|
||
|
# For the English version:
|
||
|
# python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.en.txt -o data-workers.en.pdf && pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf && pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf && pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.publication.pdf
|
||
|
# For the French:
|
||
|
# python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.fr.txt -o data-workers.fr.pdf && pdftk data-workers.fr.pdf A=data-workers.fr.pdf cat A56 output data-workers.fr.backcover.pdf && pdftk data-workers.fr.backcover.pdf multistamp logos.pdf output data-workers.fr.logos.pdf && pdftk A=data-workers.fr.pdf B=data-workers.fr.backcover.logos.pdf cat A1-55 B output data-workers.fr.publication.pdf
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
# Set the language in functions.py
|
||
|
from functions import language
|
||
|
|
||
|
# To work with a local html file
|
||
|
# (and not download a new one all the time)
|
||
|
if language == 'en':
|
||
|
htmlfile = 'data-workers.en.html'
|
||
|
url = 'http://www.algolit.net/index.php/Data_Workers?action=render'
|
||
|
else:
|
||
|
htmlfile = 'data-workers.fr.html'
|
||
|
url = 'http://www.algolit.net/index.php/Data_Workers_FR?action=render'
|
||
|
|
||
|
if not os.path.exists(htmlfile):
|
||
|
baseurl = 'http://www.algolit.net'
|
||
|
get_html_from_wiki(language, url, baseurl)
|
||
|
|
||
|
html = open(htmlfile, 'r').read()
|
||
|
soup = bs(html, 'html.parser')
|
||
|
|
||
|
# Main string where content is collected
|
||
|
out = ''
|
||
|
|
||
|
# Insert cover
|
||
|
if '.en.' in htmlfile:
|
||
|
language = 'en'
|
||
|
else:
|
||
|
language = 'fr'
|
||
|
out += create_cover('data_workers', language, steps=1)
|
||
|
|
||
|
# Insert counters page
|
||
|
out += insert_counters_page()
|
||
|
|
||
|
# Tmp elements to save content that is outside the section tags
|
||
|
about = ''
|
||
|
mundaneum = ''
|
||
|
glossary = ''
|
||
|
|
||
|
intro = ''
|
||
|
stories = ''
|
||
|
works = ''
|
||
|
|
||
|
# Enable this once the zones have started
|
||
|
zones = False
|
||
|
|
||
|
# Loop through all the elements of the page
|
||
|
for element in soup.body.section.contents:
|
||
|
|
||
|
print('\n---')
|
||
|
print(element.name)
|
||
|
|
||
|
if element.name == 'h2':
|
||
|
print('<h2> (about/mundaneum/zones/glossary)')
|
||
|
if 'about' in element.text.lower() or 'propos' in element.text.lower():
|
||
|
about += element.text.upper() + '\n'
|
||
|
|
||
|
elif 'mundaneum' in element.text.lower():
|
||
|
mundaneum += element.text.upper() + '\n'
|
||
|
|
||
|
elif 'zones' in element.text.lower():
|
||
|
|
||
|
# Append about and mundaneum text
|
||
|
# as soon as the 'zones' section starts
|
||
|
about_txt = insert_linebreaks(about, 65, type='word', double_linebreaks=True)
|
||
|
mundaneum_txt = insert_linebreaks(mundaneum, 40, type='word', double_linebreaks=True)
|
||
|
about_and_mundaneum = insert_text_block(about_txt, mundaneum_txt, 70, 40)
|
||
|
# about_and_mundaneum = create_header(about_and_mundaneum)
|
||
|
out += fill_page(about_and_mundaneum)
|
||
|
print('==> Appended about + mundaneum')
|
||
|
about = False
|
||
|
mundaneum = False
|
||
|
|
||
|
# Also append the stories text
|
||
|
# as soon as the 'zones' section starts
|
||
|
if stories:
|
||
|
stories_txt = create_stories_layout(stories)
|
||
|
print('>>> create_stories_layout length:', len(stories_txt.split('\n')))
|
||
|
pages = fill_page(stories_txt)
|
||
|
out += pages
|
||
|
print('==> Appended stories (Algolit)')
|
||
|
stories = ''
|
||
|
|
||
|
zones = True
|
||
|
|
||
|
elif 'glossary' in element.text.lower() or 'glossaire' in element.text.lower():
|
||
|
glossary += add_headers('glossary', element)
|
||
|
zones = False
|
||
|
|
||
|
elif element.name == 'h3':
|
||
|
print('<h3> (writers/oracles/cleaners/informants/readers/learners)')
|
||
|
zone = element.text
|
||
|
print('-->', zone)
|
||
|
|
||
|
# Add extra pages, to make sure the new zones always start on the right page
|
||
|
if zone.lower() == 'oracles':
|
||
|
out += insert_counters_page()
|
||
|
if zone.lower() == 'readers' or zone.lower() == 'lecteurs':
|
||
|
out += insert_counters_page()
|
||
|
|
||
|
# Add zone cover
|
||
|
cover = create_cover(zone.lower(), language, steps=1)
|
||
|
out += cover
|
||
|
print('==> Appended cover:', zone)
|
||
|
|
||
|
# Add backcover analysis for zone
|
||
|
zone_backcover = create_zone_backcover(zone, language, out)
|
||
|
zone_backcover = fill_page(zone_backcover)
|
||
|
print('>>> fill_page length:', len(pages.split('\n')))
|
||
|
out += zone_backcover
|
||
|
print('==> Appended zone_backcover')
|
||
|
|
||
|
# Insert Works or Stories sections
|
||
|
elif element.name == 'section':
|
||
|
print('<SECTION>')
|
||
|
if 'group' in element['class'][-1]:
|
||
|
for section_element in element.children:
|
||
|
if 'stories' in section_element['class'] or 'récits' in section_element['class'][1]:
|
||
|
print('--> Stories')
|
||
|
for child_element in section_element.children:
|
||
|
print('---->', child_element.name)
|
||
|
stories += add_headers('stories', child_element)
|
||
|
elif 'works' in section_element['class']:
|
||
|
print('--> Works')
|
||
|
for child_element in section_element.children:
|
||
|
print('---->', child_element.name)
|
||
|
works += add_headers('works', child_element)
|
||
|
|
||
|
# print('intro:', intro)
|
||
|
# print('stories:', stories)
|
||
|
# print('works:', works)
|
||
|
|
||
|
if zones == True:
|
||
|
|
||
|
if intro:
|
||
|
intro_txt = create_intro_text(intro, zone)
|
||
|
intro = ''
|
||
|
else:
|
||
|
intro_txt = ''
|
||
|
|
||
|
if works:
|
||
|
works_txt = create_works_text_block(intro_txt, works)
|
||
|
pages = fill_page(works_txt)
|
||
|
|
||
|
# Insert symbol backgrounds
|
||
|
lines = ''
|
||
|
for i, line in enumerate(pages.split('\n')):
|
||
|
lines += line + '\n'
|
||
|
line_number = i + 1
|
||
|
if line_number % 69 == 0:
|
||
|
out += insert_symbol_background(lines, 110, ['%', '%', '%', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '], 1)
|
||
|
lines = ''
|
||
|
|
||
|
print('==> Appended intro + works')
|
||
|
works = ''
|
||
|
|
||
|
if stories:
|
||
|
stories_txt = create_stories_layout(stories)
|
||
|
print('>>> create_stories_layout length:', len(stories_txt.split('\n')))
|
||
|
pages = fill_page(stories_txt)
|
||
|
print('>>> fill_page length:', len(pages.split('\n')))
|
||
|
out += pages
|
||
|
print('==> Appended stories')
|
||
|
stories = ''
|
||
|
|
||
|
print('</SECTION>')
|
||
|
|
||
|
else:
|
||
|
try:
|
||
|
if zones == True:
|
||
|
# Append introduction text here,
|
||
|
# when the zones section starts
|
||
|
# (as this text is written outside a section)
|
||
|
intro += add_headers('intro', element)
|
||
|
elif glossary:
|
||
|
glossary += add_headers('glossary', element)
|
||
|
elif mundaneum:
|
||
|
mundaneum += check_element(element)
|
||
|
elif about:
|
||
|
about += check_element(element)
|
||
|
except:
|
||
|
print('no content >>>', element.name)
|
||
|
|
||
|
# try:
|
||
|
# if element.name != 'section':
|
||
|
# print(element.text)
|
||
|
# except:
|
||
|
# continue
|
||
|
|
||
|
# Insert placeholder/fillup page
|
||
|
if language == 'fr':
|
||
|
out += insert_counters_page()
|
||
|
|
||
|
# Append glossary
|
||
|
glossary_txt = create_glossary(glossary)
|
||
|
# out += glossary_txt
|
||
|
out += fill_page(glossary_txt)
|
||
|
print('==> Appended glossary')
|
||
|
|
||
|
# Append backcover
|
||
|
out += create_backcover()
|
||
|
print('==> Appended backcover')
|
||
|
|
||
|
out = insert_pagenumbers(out)
|
||
|
|
||
|
# Special Effects
|
||
|
# out = insert_symbol_background(out, 110, [' '], 1)
|
||
|
# out = insert_symbol_background(out, 110, ['%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', ' '], 1)
|
||
|
# out = insert_symbol_background(out, 110, ['&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', ' '], 1, inverted=True)
|
||
|
|
||
|
out_filename = '{}'.format(htmlfile.replace('.html', '.txt'))
|
||
|
out_file = open(out_filename, 'w+')
|
||
|
out_file.write(out)
|
||
|
print('*{} written*'.format(out_filename))
|
||
|
|