#!/usr/bin/env python # -*- coding: utf-8 -*- # # Page Meta-Data # ------------------------ # Insert meta-data about the generated file into the resulting HMTL. # Copyright (C) 2019 Roel Roscam Abbing # # Support your local Low-Tech Magazine: # https://solar.lowtechmagazine.com/donate.html # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . from __future__ import unicode_literals from pelican import signals from bs4 import BeautifulSoup import os def get_printable_size(byte_size): """ Thanks Pobux! https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8 """ BASE_SIZE = 1024.00 MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"] def _fix_size(size, size_index): if not size: return "0" elif size_index == 0: return str(size) else: return "{:.3f}".format(size) current_size = byte_size size_index = 0 while current_size >= BASE_SIZE and len(MEASURE) != size_index: current_size = current_size / BASE_SIZE size_index = size_index + 1 size = _fix_size(current_size, size_index) measure = MEASURE[size_index] return size + measure def get_media(html_file): """ Currently only images because I, for one, am lazy. """ html_file = open(html_file).read() soup = BeautifulSoup(html_file, 'html.parser') media = [] for img in soup(['img', 'object']): media.append(img['src']) media = list(set(media)) # duplicate media don't increase page size return media def generate_metadata(path, context): output_path = context['OUTPUT_PATH'] output_file = context['output_file'] media_size = 0 # enumerate all media displayed on the page for m in get_media(path): m = os.path.join(output_path, m) # filter out SITEURL to prevent trouble m = m.replace(context['SITEURL']+'/', '') if os.path.exists(m): media_size = media_size + os.path.getsize(m) current_file = os.path.join(output_path, output_file) file_size = os.path.getsize(current_file) with open(current_file, 'a') as f: file_size = file_size + media_size metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size) metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata)) # cursed code is cursed f.write(metadata) # TODO: add a way to nicely insert the meta-data into an element with id def register(): signals.content_written.connect(generate_metadata)