initial commit

2019-11-12 16:23:09 +01:00 · 2019-11-12 16:23:09 +01:00 · 83d57027b1
commit 83d57027b1
3 changed files with 109 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,9 @@
 #Page Meta-Data
 A plugin to add meta-data for each generated page in the format:
 `index.html Tue 12 Nov 2019 03:52:15 PM CET 57.637KB`
 It calculates the weight of the HTML page including all image media. A time stamp reflecting the date of generation is also added.
 The data is appended at the end of the HTML, so nothing fancy for now.
--- a/init.py
+++ b/init.py
@ -0,0 +1 @@
 from .page_metadata import *
--- a/page_metadata.py
+++ b/page_metadata.py
@ -0,0 +1,99 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*- #
 # Page Meta-Data
 # ------------------------
 # Insert meta-data about the generated file into the resulting HMTL.
 # Copyright (C) 2019  Roel Roscam Abbing
 #
 # Support your local Low-Tech Magazine:
 # https://solar.lowtechmagazine.com/donate.html
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 from __future__ import unicode_literals
 from pelican import signals
 from bs4 import BeautifulSoup
 import os
 def get_printable_size(byte_size):
    """
    Thanks Pobux!
    https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8
    """
    BASE_SIZE = 1024.00
    MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"]
    def _fix_size(size, size_index):
        if not size:
            return "0"
        elif size_index == 0:
            return str(size)
        else:
            return "{:.3f}".format(size)
    current_size = byte_size
    size_index = 0
    while current_size >= BASE_SIZE and len(MEASURE) != size_index:
        current_size = current_size / BASE_SIZE
        size_index = size_index + 1
    size = _fix_size(current_size, size_index)
    measure = MEASURE[size_index]
    return size + measure
 def get_media(html_file):
    """
    Currently only images because I, for one, am lazy.
    """
    html_file = open(html_file).read()
    soup = BeautifulSoup(html_file, 'html.parser')
    media = []
    for img in soup(['img', 'object']):
        media.append(img['src'])
    media = list(set(media))  # duplicate media don't increase page size
    return media
 def generate_metadata(path, context):
    output_path = context['OUTPUT_PATH']
    output_file = context['output_file']
    media_size = 0
    # enumerate all media displayed on the page
    for m in get_media(path):
        m = os.path.join(output_path, m)
        # filter out SITEURL to prevent trouble
        m = m.replace(context['SITEURL']+'/', '')
        if os.path.exists(m):
            media_size = media_size + os.path.getsize(m)
    current_file = os.path.join(output_path, output_file)
    file_size = os.path.getsize(current_file)
    with open(current_file, 'a') as f:
        file_size = file_size + media_size
        metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size)
        metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata))  # cursed code is cursed
        f.write(metadata)
    # TODO: add a way to nicely insert the meta-data into an element with id
 def register():
    signals.content_written.connect(generate_metadata)