#! /etc/bin/python3 import random, re, subprocess from math import sin from hyphen import Hyphenator import textwrap from textwrap2 import fill import nltk from nltk.tokenize import RegexpTokenizer tokenizer = RegexpTokenizer(r'[\s\W\w]\w+[\s\W\w\.]|^\w+|\w+$') # initialize tokenizer language = 'fr' # language = 'en' def selfwritten_linebreaks(string, linelength): count = 1 tmp = '' new = '' if not 'http' in string: string = tokenizer.tokenize(string) for line_number, word in enumerate(string): count += len(word) if tmp == '': if word[0] == ' ': word = word[1:] if word == ' ': continue if line_number == len(string) - 1: tmp += word new += tmp elif count < linelength: tmp += word else: tmp += word new += tmp + '\n' tmp = '' count = 1 return new def insert_linebreaks(string, linelength, type='character', double_linebreaks=False): count = 1 tmp = '' new = '' if type == 'word': if language == 'en': hyphenator = Hyphenator('en_US') if language == 'fr': hyphenator = Hyphenator('fr_FR') paragraphs = string.split('\n') for i, paragraph in enumerate(paragraphs): try: tmp = fill(paragraph, width=linelength, use_hyphenator=hyphenator) except Exception as e: tmp = '' print('Error:', e) print('>>> Hyphenator didn\'t work, selfwritten_linebreaks used instead.') tmp = selfwritten_linebreaks(paragraph, linelength-3) # Calibration if i + 1 == len(paragraphs): # No double linebreaks when the paragraph is the last one on the page new += tmp elif double_linebreaks == True: new += tmp + '\n\n' else: new += tmp + '\n' return new if type == 'wrap': paragraphs = string.split('\n') new = '' for i, paragraph in enumerate(paragraphs): tmp = textwrap.wrap(paragraph, width=linelength) tmp = '\n'.join(tmp) if i + 1 == len(paragraphs): # No double linebreaks when the paragraph is the last one on the page new += tmp elif double_linebreaks == True: new += tmp + '\n\n' else: new += tmp + '\n' return new if type == 'character': for character in string: if count == len(string): tmp += character new += tmp elif count < linelength: tmp += character count += 1 else: new += tmp + '\n' tmp = '' count = 1 return new def fill_page(string): print('--- fill_page() starts ---') lines = string.split('\n') total_lines = len(lines) print(' total_lines :', total_lines) total_pages = int(total_lines / 70) print(' total_pages :', total_pages) full_pages_lines = 70 * total_pages print(' full_pages :', full_pages_lines) if (total_lines - full_pages_lines) == 0: print(' fill_up_lines :', 0) page = '\n'.join(lines[:total_lines]) else: fill_up_lines = 70 - (total_lines - full_pages_lines) print(' fill_up_lines :', fill_up_lines) page = string + ('\n' * (fill_up_lines)) page_lines = page.split('\n') # Safety check, to see if the string can be divided by 70 lines if len(page_lines) % 70 != 0: print('>>> Careful! The modulo is cutting lines from the pages...', total_lines - full_pages_lines) page = '\n'.join(page_lines[:full_pages_lines]) print(' page(s) length:', len(page.split('\n'))) print('--- fill_page() ends ---') return page + '\n' def insert_text_block(string, inserted, left, width): left_column_lines = string.split('\n') right_column_lines = inserted.split('\n') tmp = False if len(right_column_lines) > len(left_column_lines): leading_iterator = right_column_lines follower = left_column_lines # print('> right = leader') else: leading_iterator = left_column_lines follower = right_column_lines # print('> left = leader') new = '' for line_number, _ in enumerate(leading_iterator): # print('Inserting_text_block() ... line_number:', line_number) # Check if there are still left_column_lines to add # And count the number of characters of that line if line_number < len(left_column_lines): left_column_length = len(left_column_lines[line_number]) # If there is no line anymore, follow the length of the "left" variable else: left_column_length = left # Fill left_column_line up to the "left" variable if left_column_length < left: fill_up_spaces = left - len(left_column_lines[line_number]) left_column_line = left_column_lines[line_number] + (' ' * (fill_up_spaces)) else: left_column_line = ' ' * left # Append the left_ and right_column_line to the same line if line_number + 1 <= len(right_column_lines): new += left_column_line + right_column_lines[line_number] + '\n' # Unless there is no right_column_line anymore else: new += left_column_lines[line_number] + '\n' if new.endswith('\n'): new = new[:-1] return new def insert_symbol_background(string, linelength, symbols, multiplier): new = '' lines = string.split('\n') for line_number, line in enumerate(lines): x = line_number + 1 # Apply the multiplier, to create a gradient effect :) symbols += ' ' * int(x * multiplier) for c, character in enumerate(line): try: # if this is the last character in the line, just add it if c + 1 == len(line): character = character # if previous and next character is a space, add a symbol elif line[c-1] == ' ' and line[c+1] == ' ': character = character.replace(' ', random.choice(symbols)) except: character = character.replace(' ', random.choice(symbols)) new += character # Fill the line on the right of the text if c + 1 == len(line): new += ' ' for _ in range(c + 1, linelength): new += random.choice(symbols) new += '\n' return new def char_swap(some_string): swaps = [('–','-'), ('“','"'),('”','"'),('ù','u'), ("’","'"), ('à','a'), ('â','a'),('é','e'),('è','e'),('î','i')] for swap in swaps: some_string = some_string.replace(swap[0], swap[1]) return some_string def convert_to_figlet_font(string, linelength, font='shadow', alignment='left'): string = char_swap(string) # remove French characters in figlet titles (not all fonts include them...) string = string.replace('(edition vinyle)', '') # For Javier's titles text = insert_linebreaks(string, linelength, type='wrap', double_linebreaks=False) # print('figlet text:', text) string = '' aligments = { 'left': '-l', 'right' : '-r', 'center' : '-c' } for line in text.split('\n'): figlet_string = subprocess.check_output(['figlet', line, '-w', str(linelength * 6), '-n', '-f', font, '-p', aligments[alignment]]).decode() + '\n' # Do not include empty linebreaks in the figlet header for figlet_line in figlet_string.split('\n'): non_empty_line = re.search(r'[^\s]', figlet_line) if non_empty_line: string += figlet_line + '\n' return string def align(string, linewidth, aligment='center'): len_string = len(string) margin = int((linewidth - len_string) / 2) return (' ' * margin) + string + (' ' * margin) def check_element(element): if element.name == 'hr': string = ('-' * 3) + '\n' elif element.name == None: string = '' elif element.name == 'b': string = '<' + element.text + '>' else: string = element.text return string def add_headers(section_type, element): string = '' # print(' ----> element:', element) if 'stories' in section_type or 'récits' in section_type: if 'h2' in element.name: string += '\n' string += '--- ' + element.text + ' ---\n' # string += '^' * len(element.text) string += '\n' elif 'h3' in element.name: header = element.text.upper().replace('STORIES ABOUT', 'STORIES\nABOUT').replace('RÉCITS CONTEXTUALISÉS AUTOUR', ' RÉCITS CONTEXTUALISÉS\nAUTOUR').split('\n') for line in header: string += align(line, 56) + '\n' string += '\n\n' elif element.get('class'): if 'toc' in element['class']: pass else: string += check_element(element) + '\n' elif 'works' in section_type: if language == 'en': linewidth = 11 else: linewidth = 10 if element.get('class'): if 'lemmaheader' in element['class']: tmp_string = '\n' tmp_string += ' ' * 55 + '\n' tmp_string += ' ' * 55 + '\n' tmp_string += convert_to_figlet_font(element.text, linewidth, font='ogre', alignment='center') tmp_string += ' ' * 55 + '\n' string = insert_symbol_background(tmp_string, 55, ['0', ' ', ' ', ' ',' ', ' ', ' ',' ', ' ', ' ',' ',' ', ' ', ' ', ' ', ' '], 0) else: string += check_element(element) + '\n' else: string += check_element(element) + '\n' elif 'glossary' in section_type: if 'h2' in element.name: string += '''\ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ {} ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ '''.format(element.text.upper()) string += '\n' else: string += check_element(element) + '\n' else: string += check_element(element) + '\n' return string def apply_zigzag(string, pattern_width): count = 0 string_lines = [line for line in string.split('\n')] new = '' fwd = True for line in string_lines: if fwd == True: if count <= pattern_width: new += (' ' * count) + line + '\n' count += 1 else: fwd = False new += (' ' * count) + line + '\n' count -= 1 else: if count >= 0: new += (' ' * count) + line + '\n' count -= 1 else: fwd = True new += (' ' * count) + line + '\n' count += 1 return new # def text_to_pattern(string, template): # template = template.split('\n') # character_position = 0 # new = '' # for line_number, line in enumerate(template): # for character in line: # if character == '░': # new += string[character_position] # character_position += 1 # else: # new += ' ' # new += '\n' # return new def counting_pattern(string, linelength): count = 1 pattern = '' tmp = '' string = tokenizer.tokenize(string) for line_number, word in enumerate(string): pattern += tmp + '\n' count += len(word) if '\n' in word: word = word.replace('\n','\n\n') if line_number == len(string): tmp += word elif count < linelength: tmp += word else: tmp += word # pattern += tmp + '\n' tmp = '' count = 1 return pattern def insert_counters_page(): page = '' num = 0 count = 2 for line in range(1,70): for i in range(1,110): if num == 0: page += ' ' count += 1 num += 1 elif num < 10: page += str(num) num += 1 else: num = 0 if count == 10: count = 0 page += ' ' num += 1 count += 1 page += '\n' return fill_page(page) def insert_pagenumbers(pages): new = '' page = 0 lines = pages.split('\n') for i, line in enumerate(lines): line_number = i + 1 if line_number % 70 == 0: page += 1 if page != 1 and line_number != len(lines) and page < 56: line = (' ' * page * 2) + str(page) new += line + '\n' return new def sinus_jj(): line_width = 110 line_height = 70 out = '' count = 0 for x in range(line_width * line_height): if count == 10: count = 0 s = int((sin(5 * x) + 1) * 10) print(s) out += str(count) + (' ' * s) count += 1 page = '' linenumber = 0 for i, c in enumerate(out): if i % line_width == 0: range_start = linenumber * line_width range_end = range_start + line_width page += out[range_start:range_end] + '\n' linenumber += 1 lines = page.split('\n') page = '\n'.join(lines[:69]) + '\n' return page