Files for the publication & poster for Data Workers, an exhibition by Algolit. http://www.algolit.net/index.php/Data_Workers
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

398 lines
11 KiB

#! /etc/bin/python3
import random, re, subprocess
from hyphen import Hyphenator
import textwrap
from textwrap2 import fill
import nltk
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'[\s\W\w]\w+[\s\W\w\.]|^\w+|\w+$') # initialize tokenizer
# language = 'fr'
language = 'en'
def selfwritten_linebreaks(string, linelength):
count = 1
tmp = ''
new = ''
if not 'http' in string:
string = tokenizer.tokenize(string)
for line_number, word in enumerate(string):
count += len(word)
if tmp == '':
if word[0] == ' ':
word = word[1:]
if word == ' ':
continue
if line_number == len(string) - 1:
tmp += word
new += tmp
elif count < linelength:
tmp += word
else:
tmp += word
new += tmp + '\n'
tmp = ''
count = 1
return new
def insert_linebreaks(string, linelength, type='character', double_linebreaks=False):
count = 1
tmp = ''
new = ''
if type == 'word':
if language == 'en':
hyphenator = Hyphenator('en_US')
if language == 'fr':
hyphenator = Hyphenator('fr_FR')
paragraphs = string.split('\n')
for i, paragraph in enumerate(paragraphs):
try:
tmp = fill(paragraph, width=linelength, use_hyphenator=hyphenator)
except Exception as e:
tmp = ''
print('Error:', e)
print('>>> Hyphenator didn\'t work, selfwritten_linebreaks used instead.')
tmp = selfwritten_linebreaks(paragraph, linelength-3) # Calibration
if i + 1 == len(paragraphs): # No double linebreaks when the paragraph is the last one on the page
new += tmp
elif double_linebreaks == True:
new += tmp + '\n\n'
else:
new += tmp + '\n'
return new
if type == 'wrap':
paragraphs = string.split('\n')
new = ''
for i, paragraph in enumerate(paragraphs):
tmp = textwrap.wrap(paragraph, width=linelength)
tmp = '\n'.join(tmp)
if i + 1 == len(paragraphs): # No double linebreaks when the paragraph is the last one on the page
new += tmp
elif double_linebreaks == True:
new += tmp + '\n\n'
else:
new += tmp + '\n'
return new
if type == 'character':
for character in string:
if count == len(string):
tmp += character
new += tmp
elif count < linelength:
tmp += character
count += 1
else:
new += tmp + '\n'
tmp = ''
count = 1
return new
def fill_page(string):
print('--- fill_page() starts ---')
lines = string.split('\n')
total_lines = len(lines)
print(' total_lines :', total_lines)
total_pages = int(total_lines / 70)
print(' total_pages :', total_pages)
full_pages_lines = 70 * total_pages
print(' full_pages :', full_pages_lines)
if (total_lines - full_pages_lines) == 0:
print(' fill_up_lines :', 0)
page = '\n'.join(lines[:total_lines])
else:
fill_up_lines = 70 - (total_lines - full_pages_lines)
print(' fill_up_lines :', fill_up_lines)
page = string + ('\n' * (fill_up_lines))
page_lines = page.split('\n')
# Safety check, to see if the string can be divided by 70 lines
if len(page_lines) % 70 != 0:
print('>>> Careful! The modulo is cutting lines from the pages...', total_lines - full_pages_lines)
page = '\n'.join(page_lines[:full_pages_lines])
print(' page(s) length:', len(page.split('\n')))
print('--- fill_page() ends ---')
return page + '\n'
def insert_text_block(string, inserted, left, width):
left_column_lines = string.split('\n')
right_column_lines = inserted.split('\n')
tmp = False
if len(right_column_lines) > len(left_column_lines):
leading_iterator = right_column_lines
follower = left_column_lines
# print('> right = leader')
else:
leading_iterator = left_column_lines
follower = right_column_lines
# print('> left = leader')
new = ''
for line_number, _ in enumerate(leading_iterator):
# print('Inserting_text_block() ... line_number:', line_number)
# Check if there are still left_column_lines to add
# And count the number of characters of that line
if line_number < len(left_column_lines):
left_column_length = len(left_column_lines[line_number])
# If there is no line anymore, follow the length of the "left" variable
else:
left_column_length = left
# Fill left_column_line up to the "left" variable
if left_column_length < left:
fill_up_spaces = left - len(left_column_lines[line_number])
left_column_line = left_column_lines[line_number] + (' ' * (fill_up_spaces))
else:
left_column_line = ' ' * left
# Append the left_ and right_column_line to the same line
if line_number + 1 <= len(right_column_lines):
new += left_column_line + right_column_lines[line_number] + '\n'
# Unless there is no right_column_line anymore
else:
new += left_column_lines[line_number] + '\n'
if new.endswith('\n'):
new = new[:-1]
return new
def insert_symbol_background(string, linelength, symbols, multiplier):
new = ''
lines = string.split('\n')
for line_number, line in enumerate(lines):
x = line_number + 1
# Apply the multiplier, to create a gradient effect :)
symbols += ' ' * int(x * multiplier)
for c, character in enumerate(line):
try:
# if this is the last character in the line, just add it
if c + 1 == len(line):
character = character
# if previous and next character is a space, add a symbol
elif line[c-1] == ' ' and line[c+1] == ' ':
character = character.replace(' ', random.choice(symbols))
except:
character = character.replace(' ', random.choice(symbols))
new += character
# Fill the line on the right of the text
if c + 1 == len(line):
new += ' '
for _ in range(c + 1, linelength):
new += random.choice(symbols)
new += '\n'
return new
def char_swap(some_string):
swaps = [('','-'), ('','"'),('','"'),('ù','u'), ("","'"), ('à','a'), ('â','a'),('é','e'),('è','e'),('î','i')]
for swap in swaps:
some_string = some_string.replace(swap[0], swap[1])
return some_string
def convert_to_figlet_font(string, linelength, font='shadow', alignment='left'):
string = char_swap(string) # remove French characters in figlet titles (not all fonts include them...)
string = string.replace('(edition vinyle)', '') # For Javier's titles
text = insert_linebreaks(string, linelength, type='wrap', double_linebreaks=False)
# print('figlet text:', text)
string = ''
aligments = {
'left': '-l',
'right' : '-r',
'center' : '-c'
}
for line in text.split('\n'):
figlet_string = subprocess.check_output(['figlet', line, '-w', str(linelength * 6), '-n', '-f', font, '-p', aligments[alignment]]).decode() + '\n'
# Do not include empty linebreaks in the figlet header
for figlet_line in figlet_string.split('\n'):
non_empty_line = re.search(r'[^\s]', figlet_line)
if non_empty_line:
string += figlet_line + '\n'
return string
def align(string, linewidth, aligment='center'):
len_string = len(string)
margin = int((linewidth - len_string) / 2)
return (' ' * margin) + string + (' ' * margin)
def check_element(element):
if element.name == 'hr':
string = ('-' * 3) + '\n'
elif element.name == None:
string = ''
elif element.name == 'b':
string = '<' + element.text + '>'
else:
string = element.text
return string
def add_headers(section_type, element):
string = ''
# print(' ----> element:', element)
if 'stories' in section_type or 'récits' in section_type:
if 'h2' in element.name:
string += '\n'
string += '--- ' + element.text + ' ---\n'
# string += '^' * len(element.text)
string += '\n'
elif 'h3' in element.name:
header = element.text.upper().replace('STORIES ABOUT', 'STORIES\nABOUT').replace('RÉCITS CONTEXTUALISÉS AUTOUR', ' RÉCITS CONTEXTUALISÉS\nAUTOUR').split('\n')
for line in header:
string += align(line, 56) + '\n'
string += '\n\n'
elif element.get('class'):
if 'toc' in element['class']:
pass
else:
string += check_element(element) + '\n'
elif 'works' in section_type:
if language == 'en':
linewidth = 11
else:
linewidth = 10
if element.get('class'):
if 'lemmaheader' in element['class']:
tmp_string = '\n'
tmp_string += ' ' * 55 + '\n'
tmp_string += ' ' * 55 + '\n'
tmp_string += convert_to_figlet_font(element.text, linewidth, font='ogre', alignment='center')
tmp_string += ' ' * 55 + '\n'
string = insert_symbol_background(tmp_string, 55, ['0', ' ', ' ', ' ',' ', ' ', ' ',' ', ' ', ' ',' ',' ', ' ', ' ', ' ', ' '], 0)
else:
string += check_element(element) + '\n'
else:
string += check_element(element) + '\n'
elif 'glossary' in section_type:
if 'h2' in element.name:
string += '''\
░ ░ ░ ░
░ ░ ░ ░
░ ░
░ ░
{}
░ ░ ░
░ ░ ░ ░
'''.format(element.text.upper())
string += '\n'
else:
string += check_element(element) + '\n'
else:
string += check_element(element) + '\n'
return string
def apply_zigzag(string, pattern_width):
count = 0
string_lines = [line for line in string.split('\n')]
new = ''
fwd = True
for line in string_lines:
if fwd == True:
if count <= pattern_width:
new += (' ' * count) + line + '\n'
count += 1
else:
fwd = False
new += (' ' * count) + line + '\n'
count -= 1
else:
if count >= 0:
new += (' ' * count) + line + '\n'
count -= 1
else:
fwd = True
new += (' ' * count) + line + '\n'
count += 1
return new
# def text_to_pattern(string, template):
# template = template.split('\n')
# character_position = 0
# new = ''
# for line_number, line in enumerate(template):
# for character in line:
# if character == '░':
# new += string[character_position]
# character_position += 1
# else:
# new += ' '
# new += '\n'
# return new
def counting_pattern(string, linelength):
count = 1
pattern = ''
tmp = ''
string = tokenizer.tokenize(string)
for line_number, word in enumerate(string):
pattern += tmp + '\n'
count += len(word)
if '\n' in word:
word = word.replace('\n','\n\n')
if line_number == len(string):
tmp += word
elif count < linelength:
tmp += word
else:
tmp += word
# pattern += tmp + '\n'
tmp = ''
count = 1
return pattern
def insert_counters_page():
page = ''
num = 0
count = 2
for line in range(1,70):
for i in range(1,110):
if num == 0:
page += ' '
count += 1
num += 1
elif num < 10:
page += str(num)
num += 1
else:
num = 0
if count == 10:
count = 0
page += ' '
num += 1
count += 1
page += '\n'
return fill_page(page)
def insert_pagenumbers(pages):
new = ''
page = 0
lines = pages.split('\n')
for i, line in enumerate(lines):
line_number = i + 1
if line_number % 70 == 0:
page += 1
if page != 1 and line_number != len(lines) and page < 56:
line = (' ' * page * 2) + str(page)
new += line + '\n'
return new