commit 83d57027b1135dff385492ddcb009cb6f50a0f8a Author: rra Date: Tue Nov 12 16:23:09 2019 +0100 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..60f1574 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +#Page Meta-Data + +A plugin to add meta-data for each generated page in the format: + +`index.html Tue 12 Nov 2019 03:52:15 PM CET 57.637KB` + +It calculates the weight of the HTML page including all image media. A time stamp reflecting the date of generation is also added. + +The data is appended at the end of the HTML, so nothing fancy for now. \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..0c815c3 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +from .page_metadata import * \ No newline at end of file diff --git a/page_metadata.py b/page_metadata.py new file mode 100644 index 0000000..3e83cf5 --- /dev/null +++ b/page_metadata.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- # + +# Page Meta-Data +# ------------------------ +# Insert meta-data about the generated file into the resulting HMTL. +# Copyright (C) 2019 Roel Roscam Abbing +# +# Support your local Low-Tech Magazine: +# https://solar.lowtechmagazine.com/donate.html + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import unicode_literals +from pelican import signals +from bs4 import BeautifulSoup +import os + + +def get_printable_size(byte_size): + """ + Thanks Pobux! + https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8 + """ + BASE_SIZE = 1024.00 + MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"] + + def _fix_size(size, size_index): + if not size: + return "0" + elif size_index == 0: + return str(size) + else: + return "{:.3f}".format(size) + + current_size = byte_size + size_index = 0 + + while current_size >= BASE_SIZE and len(MEASURE) != size_index: + current_size = current_size / BASE_SIZE + size_index = size_index + 1 + + size = _fix_size(current_size, size_index) + measure = MEASURE[size_index] + return size + measure + + +def get_media(html_file): + """ + Currently only images because I, for one, am lazy. + """ + html_file = open(html_file).read() + soup = BeautifulSoup(html_file, 'html.parser') + media = [] + for img in soup(['img', 'object']): + media.append(img['src']) + media = list(set(media)) # duplicate media don't increase page size + return media + + +def generate_metadata(path, context): + output_path = context['OUTPUT_PATH'] + output_file = context['output_file'] + media_size = 0 + # enumerate all media displayed on the page + for m in get_media(path): + m = os.path.join(output_path, m) + # filter out SITEURL to prevent trouble + m = m.replace(context['SITEURL']+'/', '') + + if os.path.exists(m): + media_size = media_size + os.path.getsize(m) + + current_file = os.path.join(output_path, output_file) + file_size = os.path.getsize(current_file) + + with open(current_file, 'a') as f: + file_size = file_size + media_size + metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size) + metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata)) # cursed code is cursed + f.write(metadata) + + # TODO: add a way to nicely insert the meta-data into an element with id + + +def register(): + signals.content_written.connect(generate_metadata)