page_metadata/page_metadata.py


								#!/usr/bin/env python


								# -*- coding: utf-8 -*- #


								# Page Meta-Data

								# ------------------------

								# Insert meta-data about the generated file into the resulting HMTL.

								# Copyright (C) 2019  Roel Roscam Abbing

								#

								# Support your local Low-Tech Magazine:

								# https://solar.lowtechmagazine.com/donate.html


								# This program is free software: you can redistribute it and/or modify

								# it under the terms of the GNU General Public License as published by

								# the Free Software Foundation, either version 3 of the License, or

								# (at your option) any later version.


								# This program is distributed in the hope that it will be useful,

								# but WITHOUT ANY WARRANTY; without even the implied warranty of

								# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								# GNU General Public License for more details.


								# You should have received a copy of the GNU General Public License

								# along with this program.  If not, see <https://www.gnu.org/licenses/>.


								from __future__ import unicode_literals

								from pelican import signals

								from bs4 import BeautifulSoup

								import os


								def get_printable_size(byte_size):

								    """

								    Thanks Pobux!

								    https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8

								    """

								    BASE_SIZE = 1024.00

								    MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"]


								    def _fix_size(size, size_index):

								        if not size:

								            return "0"

								        elif size_index == 0:

								            return str(size)

								        else:

								            return "{:.3f}".format(size)


								    current_size = byte_size

								    size_index = 0


								    while current_size >= BASE_SIZE and len(MEASURE) != size_index:

								        current_size = current_size / BASE_SIZE

								        size_index = size_index + 1


								    size = _fix_size(current_size, size_index)

								    measure = MEASURE[size_index]

								    return size + measure


								def get_media(html_file):

								    """

								    Currently only images because I, for one, am lazy.

								    """

								    html_file = open(html_file).read()

								    soup = BeautifulSoup(html_file, 'html.parser')

								    media = []

								    for img in soup(['img', 'object']):

								        media.append(img['src'])

								    media = list(set(media))  # duplicate media don't increase page size

								    return media


								def generate_metadata(path, context):

								    output_path = context['OUTPUT_PATH']

								    output_file = context['output_file']

								    media_size = 0

								    # enumerate all media displayed on the page

								    for m in get_media(path):

								        m = os.path.join(output_path, m)

								        # filter out SITEURL to prevent trouble

								        m = m.replace(context['SITEURL']+'/', '')


								        if os.path.exists(m):

								            media_size = media_size + os.path.getsize(m)


								    current_file = os.path.join(output_path, output_file)

								    file_size = os.path.getsize(current_file)


								    with open(current_file, 'a') as f:

								        file_size = file_size + media_size

								        metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size)

								        metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata))  # cursed code is cursed

								        f.write(metadata)


								    # TODO: add a way to nicely insert the meta-data into an element with id


								def register():

								    signals.content_written.connect(generate_metadata)