#! /etc/bin/python3 from bs4 import BeautifulSoup as bs import os from functions import * from get_html_from_wiki import get_html_from_wiki from create_cover import create_cover from create_backcover import create_backcover from create_zone_backcover import create_zone_backcover from create_intro_text import create_intro_text from create_works_text_blocks import create_works_text_block from create_stories_layout import create_stories_layout from create_glossary import create_glossary # Generate the publication to PDF: # $ python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.en.txt -o data-workers.en.pdf # Add logos.pdf on last page with PDFTK # $ pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf # $ pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf # $ pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.logos.pdf # PDFTK in one command: # $ pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf && pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf && pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.logos.pdf # Generate a new publication in one command: # For the English version: # python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.en.txt -o data-workers.en.pdf && pdftk data-workers.en.pdf A=data-workers.en.pdf cat A52 output data-workers.en.backcover.pdf && pdftk data-workers.en.backcover.pdf multistamp logos.pdf output data-workers.en.logos.pdf && pdftk A=data-workers.en.pdf B=data-workers.en.backcover.logos.pdf cat A1-51 B output data-workers.en.publication.pdf # For the French: # python3 create_all.py && python3 txt2pdf/txt2pdf.py -m A4 -f fonts/unifont-11.0.03.ttf -s 9 -v 0.05 -T 1 -B 0.9 -L 1.6 -R 1.4 data-workers.fr.txt -o data-workers.fr.pdf && pdftk data-workers.fr.pdf A=data-workers.fr.pdf cat A56 output data-workers.fr.backcover.pdf && pdftk data-workers.fr.backcover.pdf multistamp logos.pdf output data-workers.fr.logos.pdf && pdftk A=data-workers.fr.pdf B=data-workers.fr.backcover.logos.pdf cat A1-55 B output data-workers.fr.publication.pdf # Set the language in functions.py from functions import language # To work with a local html file # (and not download a new one all the time) if language == 'en': htmlfile = 'data-workers.en.html' url = 'http://www.algolit.net/index.php/Data_Workers?action=render' else: htmlfile = 'data-workers.fr.html' url = 'http://www.algolit.net/index.php/Data_Workers_FR?action=render' if not os.path.exists(htmlfile): baseurl = 'http://www.algolit.net' get_html_from_wiki(language, url, baseurl) html = open(htmlfile, 'r').read() soup = bs(html, 'html.parser') # Main string where content is collected out = '' # Insert cover if '.en.' in htmlfile: language = 'en' else: language = 'fr' out += create_cover('data_workers', language, steps=1) # Insert counters page out += sinus_jj() # Tmp elements to save content that is outside the section tags about = '' mundaneum = '' glossary = '' intro = '' stories = '' works = '' # Enable this once the zones have started zones = False # Loop through all the elements of the page for element in soup.body.section.contents: print('\n---') print(element.name) if element.name == 'h2': print('

(about/mundaneum/zones/glossary)') if 'about' in element.text.lower() or 'propos' in element.text.lower(): about += element.text.upper() + '\n' elif 'mundaneum' in element.text.lower(): mundaneum += element.text.upper() + '\n' elif 'zones' in element.text.lower(): # Append about and mundaneum text # as soon as the 'zones' section starts about_txt = insert_linebreaks(about, 65, type='word', double_linebreaks=True) mundaneum_txt = insert_linebreaks(mundaneum, 40, type='word', double_linebreaks=True) about_and_mundaneum = insert_text_block(about_txt, mundaneum_txt, 70, 40) # about_and_mundaneum = create_header(about_and_mundaneum) out += fill_page(about_and_mundaneum) print('==> Appended about + mundaneum') about = False mundaneum = False # Also append the stories text # as soon as the 'zones' section starts if stories: stories_txt = create_stories_layout(stories) print('>>> create_stories_layout length:', len(stories_txt.split('\n'))) pages = fill_page(stories_txt) out += pages print('==> Appended stories (Algolit)') stories = '' zones = True elif 'glossary' in element.text.lower() or 'glossaire' in element.text.lower(): glossary += add_headers('glossary', element) zones = False elif element.name == 'h3': print('

(writers/oracles/cleaners/informants/readers/learners)') zone = element.text print('-->', zone) # Add extra pages, to make sure the new zones always start on the right page if zone.lower() == 'oracles': out += sinus_jj() if zone.lower() == 'readers' or zone.lower() == 'lecteurs': out += sinus_jj() # Add zone cover cover = create_cover(zone.lower(), language, steps=1) out += cover print('==> Appended cover:', zone) # Add backcover analysis for zone zone_backcover = create_zone_backcover(zone, language, out) zone_backcover = fill_page(zone_backcover) print('>>> fill_page length:', len(pages.split('\n'))) out += zone_backcover print('==> Appended zone_backcover') # Insert Works or Stories sections elif element.name == 'section': print('
') if 'group' in element['class'][-1]: for section_element in element.children: if 'stories' in section_element['class'] or 'récits' in section_element['class'][1]: print('--> Stories') for child_element in section_element.children: print('---->', child_element.name) stories += add_headers('stories', child_element) elif 'works' in section_element['class']: print('--> Works') for child_element in section_element.children: print('---->', child_element.name) works += add_headers('works', child_element) # print('intro:', intro) # print('stories:', stories) # print('works:', works) if zones == True: if intro: intro_txt = create_intro_text(intro, zone) intro = '' else: intro_txt = '' if works: works_txt = create_works_text_block(intro_txt, works) pages = fill_page(works_txt) # Insert symbol backgrounds lines = '' for i, line in enumerate(pages.split('\n')): lines += line + '\n' line_number = i + 1 if line_number % 69 == 0: out += insert_symbol_background(lines, 110, ['%', '%', '%', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '], 1) lines = '' print('==> Appended intro + works') works = '' if stories: stories_txt = create_stories_layout(stories) print('>>> create_stories_layout length:', len(stories_txt.split('\n'))) pages = fill_page(stories_txt) print('>>> fill_page length:', len(pages.split('\n'))) out += pages print('==> Appended stories') stories = '' print('
') else: try: if zones == True: # Append introduction text here, # when the zones section starts # (as this text is written outside a section) intro += add_headers('intro', element) elif glossary: glossary += add_headers('glossary', element) elif mundaneum: mundaneum += check_element(element) elif about: about += check_element(element) except: print('no content >>>', element.name) # try: # if element.name != 'section': # print(element.text) # except: # continue # Insert placeholder/fillup page if language == 'fr': out += insert_counters_page() # Append glossary glossary_txt = create_glossary(glossary) # out += glossary_txt out += fill_page(glossary_txt) print('==> Appended glossary') # Append backcover out += create_backcover() print('==> Appended backcover') out = insert_pagenumbers(out) # Special Effects # out = insert_symbol_background(out, 110, [' '], 1) # out = insert_symbol_background(out, 110, ['%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', '%', ' '], 1) # out = insert_symbol_background(out, 110, ['&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', '&', ' '], 1, inverted=True) out_filename = '{}'.format(htmlfile.replace('.html', '.txt')) out_file = open(out_filename, 'w+') out_file.write(out) print('*{} written*'.format(out_filename))