2019-03-25 08:35:09 +01:00
#! /etc/bin/python3
from bs4 import BeautifulSoup as bs
import os
from functions import *
from get_html_from_wiki import get_html_from_wiki
from create_cover import create_cover
from create_backcover import create_backcover
from create_zone_backcover import create_zone_backcover
from create_intro_text import create_intro_text
from create_works_text_blocks import create_works_text_block
from create_stories_layout import create_stories_layout
from create_glossary import create_glossary
# Generate the publication to PDF:
# $ python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.en.txt -o data-workers.en.pdf
# Add logos.pdf on last page with PDFTK
# $ pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf
# $ pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf
# $ pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.logos.pdf
# PDFTK in one command:
# $ pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf && pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf && pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.logos.pdf
# Generate a new publication in one command:
# For the English version:
# python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.en.txt -o data-workers.en.pdf && pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf && pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf && pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.publication.pdf
# For the French:
# python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.fr.txt -o data-workers.fr.pdf && pdftk data-workers.fr.pdf A=data-workers.fr.pdf cat A56 output data-workers.fr.backcover.pdf && pdftk data-workers.fr.backcover.pdf multistamp logos.pdf output data-workers.fr.logos.pdf && pdftk A=data-workers.fr.pdf B=data-workers.fr.backcover.logos.pdf cat A1-55 B output data-workers.fr.publication.pdf
# Set the language in functions.py
from functions import language
# To work with a local html file
# (and not download a new one all the time)
if language == 'en':
htmlfile = 'data-workers.en.html'
url = 'http://www.algolit.net/index.php/Data_Workers?action=render'
htmlfile = 'data-workers.fr.html'
url = 'http://www.algolit.net/index.php/Data_Workers_FR?action=render'
if not os.path.exists(htmlfile):
baseurl = 'http://www.algolit.net'
get_html_from_wiki(language, url, baseurl)
html = open(htmlfile, 'r').read()
soup = bs(html, 'html.parser')
# Main string where content is collected
out = ''
# Insert cover
if '.en.' in htmlfile:
language = 'en'
language = 'fr'
out += create_cover('data_workers', language, steps=1)
# Insert counters page
2019-03-25 23:45:25 +01:00
out += sinus_jj()
2019-03-25 08:35:09 +01:00
# Tmp elements to save content that is outside the section tags
about = ''
mundaneum = ''
glossary = ''
intro = ''
stories = ''
works = ''
# Enable this once the zones have started
zones = False
# Loop through all the elements of the page
for element in soup.body.section.contents:
if element.name == 'h2':
print('<h2> (about/mundaneum/zones/glossary)')
if 'about' in element.text.lower() or 'propos' in element.text.lower():
about += element.text.upper() + '\n'
elif 'mundaneum' in element.text.lower():
mundaneum += element.text.upper() + '\n'
elif 'zones' in element.text.lower():
# Append about and mundaneum text
# as soon as the 'zones' section starts
about_txt = insert_linebreaks(about, 65, type='word', double_linebreaks=True)
mundaneum_txt = insert_linebreaks(mundaneum, 40, type='word', double_linebreaks=True)
about_and_mundaneum = insert_text_block(about_txt, mundaneum_txt, 70, 40)
# about_and_mundaneum = create_header(about_and_mundaneum)
out += fill_page(about_and_mundaneum)
print('==> Appended about + mundaneum')
about = False
mundaneum = False
# Also append the stories text
# as soon as the 'zones' section starts
if stories:
stories_txt = create_stories_layout(stories)
print('>>> create_stories_layout length:', len(stories_txt.split('\n')))
pages = fill_page(stories_txt)
out += pages
print('==> Appended stories (Algolit)')
stories = ''
zones = True
elif 'glossary' in element.text.lower() or 'glossaire' in element.text.lower():
glossary += add_headers('glossary', element)
zones = False
elif element.name == 'h3':
print('<h3> (writers/oracles/cleaners/informants/readers/learners)')
zone = element.text
print('-->', zone)
# Add extra pages, to make sure the new zones always start on the right page
if zone.lower() == 'oracles':
2019-03-25 23:45:25 +01:00
out += sinus_jj()
2019-03-25 08:35:09 +01:00
if zone.lower() == 'readers' or zone.lower() == 'lecteurs':
2019-03-25 23:45:25 +01:00
out += sinus_jj()
2019-03-25 08:35:09 +01:00
# Add zone cover
cover = create_cover(zone.lower(), language, steps=1)
out += cover
print('==> Appended cover:', zone)
# Add backcover analysis for zone
zone_backcover = create_zone_backcover(zone, language, out)
zone_backcover = fill_page(zone_backcover)
print('>>> fill_page length:', len(pages.split('\n')))
out += zone_backcover
print('==> Appended zone_backcover')
# Insert Works or Stories sections
elif element.name == 'section':
if 'group' in element['class'][-1]:
for section_element in element.children:
if 'stories' in section_element['class'] or 'récits' in section_element['class'][1]:
print('--> Stories')
for child_element in section_element.children:
print('---->', child_element.name)
stories += add_headers('stories', child_element)
elif 'works' in section_element['class']:
print('--> Works')
for child_element in section_element.children:
print('---->', child_element.name)
works += add_headers('works', child_element)
# print('intro:', intro)
# print('stories:', stories)
# print('works:', works)
if zones == True:
if intro:
intro_txt = create_intro_text(intro, zone)
intro = ''
intro_txt = ''
if works:
works_txt = create_works_text_block(intro_txt, works)
pages = fill_page(works_txt)
# Insert symbol backgrounds
lines = ''
for i, line in enumerate(pages.split('\n')):
lines += line + '\n'
line_number = i + 1
if line_number % 69 == 0:
out += insert_symbol_background(lines, 110, ['%', '%', '%', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '], 1)
lines = ''
print('==> Appended intro + works')
works = ''
if stories:
stories_txt = create_stories_layout(stories)
print('>>> create_stories_layout length:', len(stories_txt.split('\n')))
pages = fill_page(stories_txt)
print('>>> fill_page length:', len(pages.split('\n')))
out += pages
print('==> Appended stories')
stories = ''
if zones == True:
# Append introduction text here,
# when the zones section starts
# (as this text is written outside a section)
intro += add_headers('intro', element)
elif glossary:
glossary += add_headers('glossary', element)
elif mundaneum:
mundaneum += check_element(element)
elif about:
about += check_element(element)
print('no content >>>', element.name)
# try:
# if element.name != 'section':
# print(element.text)
# except:
# continue
# Insert placeholder/fillup page
if language == 'fr':
out += insert_counters_page()
# Append glossary
glossary_txt = create_glossary(glossary)
# out += glossary_txt
out += fill_page(glossary_txt)
print('==> Appended glossary')
# Append backcover
out += create_backcover()
print('==> Appended backcover')
out = insert_pagenumbers(out)
# Special Effects
# out = insert_symbol_background(out, 110, [' '], 1)
# out = insert_symbol_background(out, 110, ['%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', ' '], 1)
# out = insert_symbol_background(out, 110, ['&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', ' '], 1, inverted=True)
out_filename = '{}'.format(htmlfile.replace('.html', '.txt'))
out_file = open(out_filename, 'w+')
print('*{} written*'.format(out_filename))