initial commit

5 years ago · 83d57027b1
3 changed files with 109 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,9 @@
+#Page Meta-Data
+
+A plugin to add meta-data for each generated page in the format:
+
+`index.html Tue 12 Nov 2019 03:52:15 PM CET 57.637KB`
+
+It calculates the weight of the HTML page including all image media. A time stamp reflecting the date of generation is also added.
+
+The data is appended at the end of the HTML, so nothing fancy for now.
--- a/init.py
+++ b/init.py
@ -0,0 +1 @@
+from .page_metadata import *
--- a/page_metadata.py
+++ b/page_metadata.py
@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+# -*- coding: utf-8 -*- #
+
+# Page Meta-Data
+# ------------------------
+# Insert meta-data about the generated file into the resulting HMTL.
+# Copyright (C) 2019  Roel Roscam Abbing
+#
+# Support your local Low-Tech Magazine:
+# https://solar.lowtechmagazine.com/donate.html
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import unicode_literals
+from pelican import signals
+from bs4 import BeautifulSoup
+import os
+
+
+def get_printable_size(byte_size):
+    """
+    Thanks Pobux!
+    https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8
+    """
+    BASE_SIZE = 1024.00
+    MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"]
+
+    def _fix_size(size, size_index):
+        if not size:
+            return "0"
+        elif size_index == 0:
+            return str(size)
+        else:
+            return "{:.3f}".format(size)
+
+    current_size = byte_size
+    size_index = 0
+
+    while current_size >= BASE_SIZE and len(MEASURE) != size_index:
+        current_size = current_size / BASE_SIZE
+        size_index = size_index + 1
+
+    size = _fix_size(current_size, size_index)
+    measure = MEASURE[size_index]
+    return size + measure
+
+
+def get_media(html_file):
+    """
+    Currently only images because I, for one, am lazy.
+    """
+    html_file = open(html_file).read()
+    soup = BeautifulSoup(html_file, 'html.parser')
+    media = []
+    for img in soup(['img', 'object']):
+        media.append(img['src'])
+    media = list(set(media))  # duplicate media don't increase page size
+    return media
+
+
+def generate_metadata(path, context):
+    output_path = context['OUTPUT_PATH']
+    output_file = context['output_file']
+    media_size = 0
+    # enumerate all media displayed on the page
+    for m in get_media(path):
+        m = os.path.join(output_path, m)
+        # filter out SITEURL to prevent trouble
+        m = m.replace(context['SITEURL']+'/', '')
+
+        if os.path.exists(m):
+            media_size = media_size + os.path.getsize(m)
+
+    current_file = os.path.join(output_path, output_file)
+    file_size = os.path.getsize(current_file)
+
+    with open(current_file, 'a') as f:
+        file_size = file_size + media_size
+        metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size)
+        metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata))  # cursed code is cursed
+        f.write(metadata)
+
+    # TODO: add a way to nicely insert the meta-data into an element with id
+
+
+def register():
+    signals.content_written.connect(generate_metadata)