Browse Source

updating the tfidf cross reading prototype, to align it with the workshop version for the Tech Zine Fair in Paris

master
manetta 6 years ago
parent
commit
bfb2ac166d
  1. 2
      cross-reader.tfidf/readings.py
  2. 3
      cross-reader.tfidf/start.py
  3. BIN
      cross-reader.tfidf/static/css/pacifico/Pacifico.ttf
  4. 44
      cross-reader.tfidf/static/css/pacifico/SIL Open Font License.txt
  5. 68
      cross-reader.tfidf/static/css/stylesheet.css
  6. 10
      cross-reader.tfidf/templates/base.html
  7. 2
      cross-reader.tfidf/templates/document.html
  8. 2
      cross-reader.tfidf/templates/index.html
  9. 16
      cross-reader.tfidf/templates/mappings.html
  10. 2
      cross-reader.tfidf/templates/mappings.name.html
  11. 5
      cross-reader.tfidf/templates/results.html
  12. 2
      cross-reader.tfidf/tfidf.py

2
cross-reader.tfidf/readings.py

@ -187,7 +187,7 @@ def request_mappings(mapping_type):
sentences = [] sentences = []
for sentence in index[document]['sentences']: for sentence in index[document]['sentences']:
for word in tokenizer.tokenize(sentence): for word in tokenizer.tokenize(sentence):
if mapping_type == 'tfidf': if mapping_type == 'tfidf' or mapping_type == 'tfidf-mapping':
tfidf = index[document]['tfidf'][word.lower()] * multiplier # lowercased! (!important) tfidf = index[document]['tfidf'][word.lower()] * multiplier # lowercased! (!important)
if [tfidf, word.lower()] not in mappings: # lowercased! (!important) if [tfidf, word.lower()] not in mappings: # lowercased! (!important)
mappings.append([tfidf, word.lower()]) # lowercased! (!important) mappings.append([tfidf, word.lower()]) # lowercased! (!important)

3
cross-reader.tfidf/start.py

@ -71,8 +71,7 @@ def render_list_for_document(list_type, filename):
def get_contrast_mappings(mapping_type): def get_contrast_mappings(mapping_type):
""" """
Displays the page accessible at '/mappings'. Displays the page accessible at '/mappings'.
A TF-IDF visualisation is displayed, A listed overview of all values in the dataset is displayed.
using the TF-IDF values as font-size.
""" """
mappings, filenames = readings.request_mappings(mapping_type) mappings, filenames = readings.request_mappings(mapping_type)
suggestions = open('words.txt', 'r').readlines() suggestions = open('words.txt', 'r').readlines()

BIN
cross-reader.tfidf/static/css/pacifico/Pacifico.ttf

Binary file not shown.

44
cross-reader.tfidf/static/css/pacifico/SIL Open Font License.txt

@ -0,0 +1,44 @@
Copyright (c) 2011, Vernon Adams (vern@newtypography.co.uk),
with Reserved Font Name Pacifico.
This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at: http://scripts.sil.org/OFL
-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------
PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others.
The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives.
DEFINITIONS
"Font Software" refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation.
"Reserved Font Name" refers to any names specified as such after the copyright statement(s).
"Original Version" refers to the collection of Font Software components as distributed by the Copyright Holder(s).
"Modified Version" refers to any derivative made by adding to, deleting, or substituting -- in part or in whole -- any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment.
"Author" refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software.
PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions:
1) Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself.
2) Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user.
3) No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users.
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission.
5) The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software.
TERMINATION
This license becomes null and void if any of the above conditions are not met.
DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.

68
cross-reader.tfidf/static/css/stylesheet.css

@ -1,12 +1,19 @@
@font-face{
font-family: 'header';
src: url('./pacifico/Pacifico.ttf');
}
body{ body{
margin:20px; margin:20px;
font-size: 16px; font-size: 16px;
line-height: 22px; line-height: 1.4;
overflow-x: hidden; overflow-x: hidden;
} }
h1, h2, h3{ h1, h2, h3{
font-size: 100%; font-size: 100%;
margin:2em 0 1em 0; margin:2.1em 0 0 0;
}
h2{
font-weight: normal;
} }
hr{ hr{
border:0; border:0;
@ -20,23 +27,28 @@ a, a:active, a:hover{
border-bottom:1px dotted; border-bottom:1px dotted;
padding:0; padding:0;
margin:0; margin:0;
z-index: 1;
} }
#nav-wrapper{ #nav-wrapper, #nav-wrapper a{
z-index: -1; z-index: -1;
color:magenta;
} }
#nav{ #nav{
display: inline-block; display: inline-block;
position: absolute;
top:12px;
} }
#logo{ #logo{
margin:0 30px 30px 0; margin:0 30px 30px 0;
font-family: monospace,; font-family: 'header', monospace;
font-size: 16px; font-size: 500%;
line-height: 1.1; line-height: 1.1;
font-weight: normal; font-weight: normal;
} }
#logo a{ #logo a{
border:0; border:0;
color:magenta;
} }
#search{ #search{
width: 400px; width: 400px;
@ -48,7 +60,7 @@ a, a:active, a:hover{
width: 100%; width: 100%;
height: 30px; height: 30px;
padding:0px 10px; padding:0px 10px;
border:1px solid rgba(190,190,190,1); border:1px solid magenta;
} }
#search #submit{ #search #submit{
position: absolute; position: absolute;
@ -60,6 +72,7 @@ a, a:active, a:hover{
border-radius: 100%; border-radius: 100%;
background-color:transparent; background-color:transparent;
text-align: center; text-align: center;
color:magenta;
} }
#search #submit:hover{ #search #submit:hover{
cursor: pointer; cursor: pointer;
@ -67,7 +80,7 @@ a, a:active, a:hover{
#txt-list{ #txt-list{
position: absolute; position: absolute;
width:250px; width:250px;
right: 0; right: 10px;
top:0; top:0;
z-index: 3; z-index: 3;
} }
@ -87,8 +100,8 @@ a, a:active, a:hover{
} }
#suggestions{ #suggestions{
position: absolute; position: absolute;
width: 250px; width: 150px;
right: 300px; right: 320px;
top:0; top:0;
z-index: 2; z-index: 2;
} }
@ -102,21 +115,50 @@ a, a:active, a:hover{
#results{ #results{
max-width: 800px; max-width: 800px;
} }
#results h2{
}
#results .line{ #results .line{
margin:0.5em 0 1em; margin:0 0 1em;
font-size: 125%;
} }
#results strong.query{ #results strong.query{
width: 100%; width: 100%;
height: 12em; height: 12em;
padding: 2em 6em 2em 6em; padding: 2em 6em 2em 6em;
margin:-9.7em -6em -10.5em -6em; margin:-9.7em -6em -10.5em -6em;
z-index: -3;
} }
#mappings{ #mappings{
line-height: 1.5; line-height: 1;
}
#list{
line-height: 1.4;
} }
#analytics{ #analytics{
margin:2em 0; margin:2em 0;
} }
@media print{
#nav-wrapper{
display: none;
}
#wrapper{
width: 90%;
}
a{
border:0;
}
#results h2{
margin:2.1em 0 0;
}
#results .line{
margin:0 0 0.7em;
}
#results strong.query{
margin:0 0.2em 0 0;
padding:0.15em 0.25em 0.3em 0.5em;
height: auto;
border:1px dotted magenta;
border-radius: 25px 15px;
}
}

10
cross-reader.tfidf/templates/base.html

@ -7,16 +7,18 @@
</head> </head>
<body> <body>
<div id="nav-wrapper"> <div id="nav-wrapper">
<div id="nav"> <div id="nav">
<a class="contrast" href="/mappings/tfidf-mapping"></a>
<!-- <small><a class="contrast" href="/mappings/tf">TF</a></small> --> <!-- <small><a class="contrast" href="/mappings/tf">TF</a></small> -->
<small><a class="contrast" href="/mappings/idf">IDF</a></small> <small><a class="contrast" href="/mappings/idf">IDF</a></small>
<small><a class="contrast" href="/mappings/tfidf">TFIDF</a></small> <small><a class="contrast" href="/mappings/tfidf">TFIDF</a></small>
</div><br><br> </div><br><br>
<div id="logo"> <div id="logo">
<a href="/"> <a href="/">
%█▀▀ █▀▀█ █▀▀█ █▀▀ █▀▀ ░░ █▀▀█ █▀▀ █▀▀█ █▀▀▄ ░▀░ █▀▀▄ █▀▀▀ █▀▀ <br> Cross-Readings
%█░░ █▄▄▀ █░░█ ▀▀█ ▀▀█ ▀▀ █▄▄▀ █▀▀ █▄▄█ █░░█ ▀█▀ █░░█ █░▀█ ▀▀█ <br> <!-- %█▀▀ █▀▀█ █▀▀█ █▀▀ █▀▀ ░░ █▀▀█ █▀▀ █▀▀█ █▀▀▄ ░▀░ █▀▀▄ █▀▀▀ █▀▀ <br> -->
%▀▀▀ ▀░▀▀ ▀▀▀▀ ▀▀▀ ▀▀▀ ░░ ▀░▀▀ ▀▀▀ ▀░░▀ ▀▀▀░ ▀▀▀ ▀░░▀ ▀▀▀▀ ▀▀▀ <br> <!-- %█░░ █▄▄▀ █░░█ ▀▀█ ▀▀█ ▀▀ █▄▄▀ █▀▀ █▄▄█ █░░█ ▀█▀ █░░█ █░▀█ ▀▀█ <br> -->
<!-- %▀▀▀ ▀░▀▀ ▀▀▀▀ ▀▀▀ ▀▀▀ ░░ ▀░▀▀ ▀▀▀ ▀░░▀ ▀▀▀░ ▀▀▀ ▀░░▀ ▀▀▀▀ ▀▀▀ <br> -->
<!-- https://fsymbols.com/generators/tarty/ --> <!-- https://fsymbols.com/generators/tarty/ -->
</a> </a>
</div> </div>

2
cross-reader.tfidf/templates/document.html

@ -7,7 +7,7 @@
{% block content %} {% block content %}
<div id="document"> <div id="document">
{% for line in txt%} {% for line in txt%}
<p>{{ line }}</p> <p>{{ line.replace('•', '') }}</p>
{% endfor %} {% endfor %}
</div> </div>
{% endblock %} {% endblock %}

2
cross-reader.tfidf/templates/index.html

@ -12,7 +12,7 @@
{% block results %} {% block results %}
<div id="intro"> <div id="intro">
<p>This Sear%r%rch tool only works with <strong>one word</strong>.</p> <p>This cross-reading tool only works with <strong>one word</strong>.</p>
</div> </div>
{% endblock %} {% endblock %}

16
cross-reader.tfidf/templates/mappings.html

@ -5,17 +5,25 @@
{% endblock %} {% endblock %}
{% block results %} {% block results %}
<div id="mappings"> {% if mapping_type == 'tfidf-mapping' %}
{% if mapping_type == 'tfidf' %} <div id="mappings">
{% for value, word in mappings %} {% for value, word in mappings %}
<strong class="query" style="font-size:{{ 100 + value }}%;"> <a href="/?q={{ word }}">{{ word }}</a> </strong> <strong class="query" style="font-size:{{ 100 + value }}%;"> <a href="/?q={{ word }}">{{ word }}</a> </strong>
{% endfor %} {% endfor %}
{% else %} </div>
{% elif mapping_type == 'idf' %}
<div id="list">
{% for value, word in mappings %} {% for value, word in mappings %}
<strong class="query"> <a href="/?q={{ word }}">{{ word }}</a></strong> ({{ value }})<br> <strong class="query"> <a href="/?q={{ word }}">{{ word }}</a></strong> ({{ value }})<br>
{% endfor %} {% endfor %}
</div>
{% else %}
<div id="list">
{% for value, word in mappings %}
<strong class="query" > <a href="/?q={{ word }}">{{ word }}</a></strong> ({{ value }})<br>
{% endfor %}
</div>
{% endif %} {% endif %}
</div>
{% endblock %} {% endblock %}
{% block suggestions %} {% block suggestions %}

2
cross-reader.tfidf/templates/mappings.name.html

@ -5,6 +5,7 @@
{% endblock %} {% endblock %}
{% block content %} {% block content %}
<div id="mappings">
<h1>{{ document | prettyfilename }}</h1> <h1>{{ document | prettyfilename }}</h1>
{% for sentence in mappings %} {% for sentence in mappings %}
<p class="sentence"> <p class="sentence">
@ -13,6 +14,7 @@
{% endfor %} {% endfor %}
</p> </p>
{% endfor %} {% endfor %}
</div>
{% endblock %} {% endblock %}
{% block suggestions %} {% block suggestions %}

5
cross-reader.tfidf/templates/results.html

@ -14,11 +14,6 @@
{% for _, document in results.items() %} {% for _, document in results.items() %}
{% for line in document.html %} {% for line in document.html %}
<div class="result"> <div class="result">
<!-- <div class="ascii">
✄█▀▀ █▀▀█ █░░ █░░ <br>
✄█░░ █▄▄█ █░░ █░░ <br>
✄▀▀▀ ▀░░▀ ▀▀▀ ▀▀▀ <br>
</div> -->
{% set name = document.filename.replace('.txt', '') %} {% set name = document.filename.replace('.txt', '') %}
<h2><a href="/document/{{ name }}">{{ document.name }}</a></h2> <h2><a href="/document/{{ name }}">{{ document.name }}</a></h2>
<div class="line">{{ line }}</div> <div class="line">{{ line }}</div>

2
cross-reader.tfidf/tfidf.py

@ -34,7 +34,7 @@ def tfidf(query, words, corpus):
tfidf_value = tf * idf tfidf_value = tf * idf
# print('TF-IDF:', tfidf_value) # print('TF-IDF:', tfidf_value)
return tf_count, idf_count, tfidf_value return tf, idf_count, tfidf_value
def get_language(document): def get_language(document):
match = re.search(r'\[.*\]', document, flags=re.IGNORECASE) match = re.search(r'\[.*\]', document, flags=re.IGNORECASE)

Loading…
Cancel
Save