426 lines
12 KiB
Python
426 lines
12 KiB
Python
#! /etc/bin/python3
|
||
|
||
import random, re, subprocess
|
||
from math import sin
|
||
|
||
from hyphen import Hyphenator
|
||
import textwrap
|
||
from textwrap2 import fill
|
||
|
||
import nltk
|
||
from nltk.tokenize import RegexpTokenizer
|
||
tokenizer = RegexpTokenizer(r'[\s\W\w]\w+[\s\W\w\.]|^\w+|\w+$') # initialize tokenizer
|
||
|
||
language = 'fr'
|
||
# language = 'en'
|
||
|
||
def selfwritten_linebreaks(string, linelength):
|
||
count = 1
|
||
tmp = ''
|
||
new = ''
|
||
if not 'http' in string:
|
||
string = tokenizer.tokenize(string)
|
||
for line_number, word in enumerate(string):
|
||
count += len(word)
|
||
if tmp == '':
|
||
if word[0] == ' ':
|
||
word = word[1:]
|
||
if word == ' ':
|
||
continue
|
||
if line_number == len(string) - 1:
|
||
tmp += word
|
||
new += tmp
|
||
elif count < linelength:
|
||
tmp += word
|
||
else:
|
||
tmp += word
|
||
new += tmp + '\n'
|
||
tmp = ''
|
||
count = 1
|
||
return new
|
||
|
||
def insert_linebreaks(string, linelength, type='character', double_linebreaks=False):
|
||
count = 1
|
||
tmp = ''
|
||
new = ''
|
||
if type == 'word':
|
||
if language == 'en':
|
||
hyphenator = Hyphenator('en_US')
|
||
if language == 'fr':
|
||
hyphenator = Hyphenator('fr_FR')
|
||
paragraphs = string.split('\n')
|
||
for i, paragraph in enumerate(paragraphs):
|
||
try:
|
||
tmp = fill(paragraph, width=linelength, use_hyphenator=hyphenator)
|
||
except Exception as e:
|
||
tmp = ''
|
||
print('Error:', e)
|
||
print('>>> Hyphenator didn\'t work, selfwritten_linebreaks used instead.')
|
||
tmp = selfwritten_linebreaks(paragraph, linelength-3) # Calibration
|
||
|
||
if i + 1 == len(paragraphs): # No double linebreaks when the paragraph is the last one on the page
|
||
new += tmp
|
||
elif double_linebreaks == True:
|
||
new += tmp + '\n\n'
|
||
else:
|
||
new += tmp + '\n'
|
||
return new
|
||
if type == 'wrap':
|
||
paragraphs = string.split('\n')
|
||
new = ''
|
||
for i, paragraph in enumerate(paragraphs):
|
||
tmp = textwrap.wrap(paragraph, width=linelength)
|
||
tmp = '\n'.join(tmp)
|
||
|
||
if i + 1 == len(paragraphs): # No double linebreaks when the paragraph is the last one on the page
|
||
new += tmp
|
||
elif double_linebreaks == True:
|
||
new += tmp + '\n\n'
|
||
else:
|
||
new += tmp + '\n'
|
||
return new
|
||
if type == 'character':
|
||
for character in string:
|
||
if count == len(string):
|
||
tmp += character
|
||
new += tmp
|
||
elif count < linelength:
|
||
tmp += character
|
||
count += 1
|
||
else:
|
||
new += tmp + '\n'
|
||
tmp = ''
|
||
count = 1
|
||
return new
|
||
|
||
def fill_page(string):
|
||
print('--- fill_page() starts ---')
|
||
lines = string.split('\n')
|
||
total_lines = len(lines)
|
||
print(' total_lines :', total_lines)
|
||
total_pages = int(total_lines / 70)
|
||
print(' total_pages :', total_pages)
|
||
full_pages_lines = 70 * total_pages
|
||
print(' full_pages :', full_pages_lines)
|
||
if (total_lines - full_pages_lines) == 0:
|
||
print(' fill_up_lines :', 0)
|
||
page = '\n'.join(lines[:total_lines])
|
||
else:
|
||
fill_up_lines = 70 - (total_lines - full_pages_lines)
|
||
print(' fill_up_lines :', fill_up_lines)
|
||
page = string + ('\n' * (fill_up_lines))
|
||
page_lines = page.split('\n')
|
||
|
||
# Safety check, to see if the string can be divided by 70 lines
|
||
if len(page_lines) % 70 != 0:
|
||
print('>>> Careful! The modulo is cutting lines from the pages...', total_lines - full_pages_lines)
|
||
page = '\n'.join(page_lines[:full_pages_lines])
|
||
|
||
print(' page(s) length:', len(page.split('\n')))
|
||
print('--- fill_page() ends ---')
|
||
return page + '\n'
|
||
|
||
def insert_text_block(string, inserted, left, width):
|
||
left_column_lines = string.split('\n')
|
||
right_column_lines = inserted.split('\n')
|
||
tmp = False
|
||
if len(right_column_lines) > len(left_column_lines):
|
||
leading_iterator = right_column_lines
|
||
follower = left_column_lines
|
||
# print('> right = leader')
|
||
else:
|
||
leading_iterator = left_column_lines
|
||
follower = right_column_lines
|
||
# print('> left = leader')
|
||
new = ''
|
||
for line_number, _ in enumerate(leading_iterator):
|
||
# print('Inserting_text_block() ... line_number:', line_number)
|
||
# Check if there are still left_column_lines to add
|
||
# And count the number of characters of that line
|
||
if line_number < len(left_column_lines):
|
||
left_column_length = len(left_column_lines[line_number])
|
||
# If there is no line anymore, follow the length of the "left" variable
|
||
else:
|
||
left_column_length = left
|
||
|
||
# Fill left_column_line up to the "left" variable
|
||
if left_column_length < left:
|
||
fill_up_spaces = left - len(left_column_lines[line_number])
|
||
left_column_line = left_column_lines[line_number] + (' ' * (fill_up_spaces))
|
||
else:
|
||
left_column_line = ' ' * left
|
||
|
||
# Append the left_ and right_column_line to the same line
|
||
if line_number + 1 <= len(right_column_lines):
|
||
new += left_column_line + right_column_lines[line_number] + '\n'
|
||
# Unless there is no right_column_line anymore
|
||
else:
|
||
new += left_column_lines[line_number] + '\n'
|
||
|
||
if new.endswith('\n'):
|
||
new = new[:-1]
|
||
return new
|
||
|
||
def insert_symbol_background(string, linelength, symbols, multiplier):
|
||
new = ''
|
||
lines = string.split('\n')
|
||
|
||
for line_number, line in enumerate(lines):
|
||
x = line_number + 1
|
||
|
||
# Apply the multiplier, to create a gradient effect :)
|
||
symbols += ' ' * int(x * multiplier)
|
||
|
||
for c, character in enumerate(line):
|
||
try:
|
||
# if this is the last character in the line, just add it
|
||
if c + 1 == len(line):
|
||
character = character
|
||
# if previous and next character is a space, add a symbol
|
||
elif line[c-1] == ' ' and line[c+1] == ' ':
|
||
character = character.replace(' ', random.choice(symbols))
|
||
except:
|
||
character = character.replace(' ', random.choice(symbols))
|
||
new += character
|
||
|
||
# Fill the line on the right of the text
|
||
if c + 1 == len(line):
|
||
new += ' '
|
||
for _ in range(c + 1, linelength):
|
||
new += random.choice(symbols)
|
||
|
||
new += '\n'
|
||
|
||
return new
|
||
|
||
def char_swap(some_string):
|
||
swaps = [('–','-'), ('“','"'),('”','"'),('ù','u'), ("’","'"), ('à','a'), ('â','a'),('é','e'),('è','e'),('î','i')]
|
||
for swap in swaps:
|
||
some_string = some_string.replace(swap[0], swap[1])
|
||
return some_string
|
||
|
||
def convert_to_figlet_font(string, linelength, font='shadow', alignment='left'):
|
||
string = char_swap(string) # remove French characters in figlet titles (not all fonts include them...)
|
||
string = string.replace('(edition vinyle)', '') # For Javier's titles
|
||
text = insert_linebreaks(string, linelength, type='wrap', double_linebreaks=False)
|
||
# print('figlet text:', text)
|
||
string = ''
|
||
aligments = {
|
||
'left': '-l',
|
||
'right' : '-r',
|
||
'center' : '-c'
|
||
}
|
||
for line in text.split('\n'):
|
||
figlet_string = subprocess.check_output(['figlet', line, '-w', str(linelength * 6), '-n', '-f', font, '-p', aligments[alignment]]).decode() + '\n'
|
||
|
||
# Do not include empty linebreaks in the figlet header
|
||
for figlet_line in figlet_string.split('\n'):
|
||
non_empty_line = re.search(r'[^\s]', figlet_line)
|
||
if non_empty_line:
|
||
string += figlet_line + '\n'
|
||
|
||
return string
|
||
|
||
def align(string, linewidth, aligment='center'):
|
||
len_string = len(string)
|
||
margin = int((linewidth - len_string) / 2)
|
||
return (' ' * margin) + string + (' ' * margin)
|
||
|
||
def check_element(element):
|
||
if element.name == 'hr':
|
||
string = ('-' * 3) + '\n'
|
||
elif element.name == None:
|
||
string = ''
|
||
elif element.name == 'b':
|
||
string = '<' + element.text + '>'
|
||
else:
|
||
string = element.text
|
||
return string
|
||
|
||
def add_headers(section_type, element):
|
||
string = ''
|
||
# print(' ----> element:', element)
|
||
|
||
if 'stories' in section_type or 'récits' in section_type:
|
||
if 'h2' in element.name:
|
||
string += '\n'
|
||
string += '--- ' + element.text + ' ---\n'
|
||
# string += '^' * len(element.text)
|
||
string += '\n'
|
||
elif 'h3' in element.name:
|
||
header = element.text.upper().replace('STORIES ABOUT', 'STORIES\nABOUT').replace('RÉCITS CONTEXTUALISÉS AUTOUR', ' RÉCITS CONTEXTUALISÉS\nAUTOUR').split('\n')
|
||
for line in header:
|
||
string += align(line, 56) + '\n'
|
||
string += '\n\n'
|
||
elif element.get('class'):
|
||
if 'toc' in element['class']:
|
||
pass
|
||
else:
|
||
string += check_element(element) + '\n'
|
||
|
||
elif 'works' in section_type:
|
||
if language == 'en':
|
||
linewidth = 11
|
||
else:
|
||
linewidth = 10
|
||
|
||
if element.get('class'):
|
||
if 'lemmaheader' in element['class']:
|
||
tmp_string = '\n'
|
||
tmp_string += ' ' * 55 + '\n'
|
||
tmp_string += ' ' * 55 + '\n'
|
||
tmp_string += convert_to_figlet_font(element.text, linewidth, font='ogre', alignment='center')
|
||
tmp_string += ' ' * 55 + '\n'
|
||
string = insert_symbol_background(tmp_string, 55, ['0', ' ', ' ', ' ',' ', ' ', ' ',' ', ' ', ' ',' ',' ', ' ', ' ', ' ', ' '], 0)
|
||
else:
|
||
string += check_element(element) + '\n'
|
||
else:
|
||
string += check_element(element) + '\n'
|
||
|
||
elif 'glossary' in section_type:
|
||
if 'h2' in element.name:
|
||
string += '''\
|
||
░
|
||
░
|
||
░ ░ ░ ░
|
||
░ ░ ░ ░
|
||
░ ░
|
||
░ ░
|
||
░ {} ░
|
||
░
|
||
░ ░ ░
|
||
░ ░ ░ ░
|
||
░
|
||
░
|
||
░
|
||
'''.format(element.text.upper())
|
||
string += '\n'
|
||
else:
|
||
string += check_element(element) + '\n'
|
||
else:
|
||
string += check_element(element) + '\n'
|
||
|
||
return string
|
||
|
||
|
||
def apply_zigzag(string, pattern_width):
|
||
count = 0
|
||
string_lines = [line for line in string.split('\n')]
|
||
new = ''
|
||
fwd = True
|
||
for line in string_lines:
|
||
if fwd == True:
|
||
if count <= pattern_width:
|
||
new += (' ' * count) + line + '\n'
|
||
count += 1
|
||
else:
|
||
fwd = False
|
||
new += (' ' * count) + line + '\n'
|
||
count -= 1
|
||
else:
|
||
if count >= 0:
|
||
new += (' ' * count) + line + '\n'
|
||
count -= 1
|
||
else:
|
||
fwd = True
|
||
new += (' ' * count) + line + '\n'
|
||
count += 1
|
||
return new
|
||
|
||
# def text_to_pattern(string, template):
|
||
# template = template.split('\n')
|
||
# character_position = 0
|
||
# new = ''
|
||
# for line_number, line in enumerate(template):
|
||
# for character in line:
|
||
# if character == '░':
|
||
# new += string[character_position]
|
||
# character_position += 1
|
||
# else:
|
||
# new += ' '
|
||
# new += '\n'
|
||
# return new
|
||
|
||
def counting_pattern(string, linelength):
|
||
count = 1
|
||
pattern = ''
|
||
tmp = ''
|
||
string = tokenizer.tokenize(string)
|
||
for line_number, word in enumerate(string):
|
||
pattern += tmp + '\n'
|
||
count += len(word)
|
||
if '\n' in word:
|
||
word = word.replace('\n','\n\n')
|
||
if line_number == len(string):
|
||
tmp += word
|
||
elif count < linelength:
|
||
tmp += word
|
||
else:
|
||
tmp += word
|
||
# pattern += tmp + '\n'
|
||
tmp = ''
|
||
count = 1
|
||
return pattern
|
||
|
||
def insert_counters_page():
|
||
page = ''
|
||
num = 0
|
||
count = 2
|
||
for line in range(1,70):
|
||
for i in range(1,110):
|
||
if num == 0:
|
||
page += ' '
|
||
count += 1
|
||
num += 1
|
||
elif num < 10:
|
||
page += str(num)
|
||
num += 1
|
||
else:
|
||
num = 0
|
||
if count == 10:
|
||
count = 0
|
||
page += ' '
|
||
num += 1
|
||
count += 1
|
||
page += '\n'
|
||
return fill_page(page)
|
||
|
||
def insert_pagenumbers(pages):
|
||
new = ''
|
||
page = 0
|
||
lines = pages.split('\n')
|
||
for i, line in enumerate(lines):
|
||
line_number = i + 1
|
||
if line_number % 70 == 0:
|
||
page += 1
|
||
if page != 1 and line_number != len(lines) and page < 56:
|
||
line = (' ' * page * 2) + str(page)
|
||
new += line + '\n'
|
||
return new
|
||
|
||
def sinus_jj():
|
||
line_width = 110
|
||
line_height = 70
|
||
|
||
out = ''
|
||
count = 0
|
||
for x in range(line_width * line_height):
|
||
if count == 10:
|
||
count = 0
|
||
s = int((sin(5 * x) + 1) * 10)
|
||
print(s)
|
||
out += str(count) + (' ' * s)
|
||
count += 1
|
||
|
||
page = ''
|
||
linenumber = 0
|
||
for i, c in enumerate(out):
|
||
if i % line_width == 0:
|
||
range_start = linenumber * line_width
|
||
range_end = range_start + line_width
|
||
page += out[range_start:range_end] + '\n'
|
||
linenumber += 1
|
||
|
||
lines = page.split('\n')
|
||
page = '\n'.join(lines[:69]) + '\n'
|
||
return page |