varia.website/plugins/post_stats/post_stats.py

72 lines
2.2 KiB
Python

# -*- coding: utf-8 -*-
"""
Post Statistics
========================
This plugin calculates various statistics about a post and stores them in an article.stats dictionary:
wc: how many words
read_mins: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension)
word_counts: frquency count of all the words in the article; can be used for tag/word clouds/
fi: Flesch-kincaid Index/ Reading Ease
fk: Flesch-kincaid Grade Level
"""
from pelican import signals
from bs4 import BeautifulSoup
import re
from collections import Counter
from .readability import *
def calculate_stats(instance):
if instance._content is not None:
stats = {}
content = instance._content
# How fast do average people read?
WPM = 250
# Use BeautifulSoup to get readable/visible text
raw_text = BeautifulSoup(content, 'html.parser').getText()
# Process the text to remove entities
entities = r'\&\#?.+?;'
raw_text = raw_text.replace(' ', ' ')
raw_text = re.sub(entities, '', raw_text)
# Flesch-kincaid readbility stats counts sentances,
# so save before removing punctuation
tmp = raw_text
# Process the text to remove punctuation
drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”'
raw_text = raw_text.translate(dict((ord(c), u'') for c in drop))
# Count the words in the text
words = raw_text.lower().split()
word_count = Counter(words)
# Return the stats
stats['word_counts'] = word_count
stats['wc'] = sum(word_count.values())
# Calulate how long it'll take to read, rounding up
stats['read_mins'] = (stats['wc'] + WPM - 1) // WPM
if stats['read_mins'] == 0:
stats['read_mins'] = 1
# Calculate Flesch-kincaid readbility stats
readability_stats = stcs, words, sbls = text_stats(tmp, stats['wc'])
stats['fi'] = "{:.2f}".format(flesch_index(readability_stats))
stats['fk'] = "{:.2f}".format(flesch_kincaid_level(readability_stats))
instance.stats = stats
def register():
signals.content_object_init.connect(calculate_stats)