rra
5 years ago
commit
83d57027b1
3 changed files with 109 additions and 0 deletions
@ -0,0 +1,9 @@ |
|||
#Page Meta-Data |
|||
|
|||
A plugin to add meta-data for each generated page in the format: |
|||
|
|||
`index.html Tue 12 Nov 2019 03:52:15 PM CET 57.637KB` |
|||
|
|||
It calculates the weight of the HTML page including all image media. A time stamp reflecting the date of generation is also added. |
|||
|
|||
The data is appended at the end of the HTML, so nothing fancy for now. |
@ -0,0 +1 @@ |
|||
from .page_metadata import * |
@ -0,0 +1,99 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
# -*- coding: utf-8 -*- # |
|||
|
|||
# Page Meta-Data |
|||
# ------------------------ |
|||
# Insert meta-data about the generated file into the resulting HMTL. |
|||
# Copyright (C) 2019 Roel Roscam Abbing |
|||
# |
|||
# Support your local Low-Tech Magazine: |
|||
# https://solar.lowtechmagazine.com/donate.html |
|||
|
|||
# This program is free software: you can redistribute it and/or modify |
|||
# it under the terms of the GNU General Public License as published by |
|||
# the Free Software Foundation, either version 3 of the License, or |
|||
# (at your option) any later version. |
|||
|
|||
# This program is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
# GNU General Public License for more details. |
|||
|
|||
# You should have received a copy of the GNU General Public License |
|||
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
|||
|
|||
from __future__ import unicode_literals |
|||
from pelican import signals |
|||
from bs4 import BeautifulSoup |
|||
import os |
|||
|
|||
|
|||
def get_printable_size(byte_size): |
|||
""" |
|||
Thanks Pobux! |
|||
https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8 |
|||
""" |
|||
BASE_SIZE = 1024.00 |
|||
MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"] |
|||
|
|||
def _fix_size(size, size_index): |
|||
if not size: |
|||
return "0" |
|||
elif size_index == 0: |
|||
return str(size) |
|||
else: |
|||
return "{:.3f}".format(size) |
|||
|
|||
current_size = byte_size |
|||
size_index = 0 |
|||
|
|||
while current_size >= BASE_SIZE and len(MEASURE) != size_index: |
|||
current_size = current_size / BASE_SIZE |
|||
size_index = size_index + 1 |
|||
|
|||
size = _fix_size(current_size, size_index) |
|||
measure = MEASURE[size_index] |
|||
return size + measure |
|||
|
|||
|
|||
def get_media(html_file): |
|||
""" |
|||
Currently only images because I, for one, am lazy. |
|||
""" |
|||
html_file = open(html_file).read() |
|||
soup = BeautifulSoup(html_file, 'html.parser') |
|||
media = [] |
|||
for img in soup(['img', 'object']): |
|||
media.append(img['src']) |
|||
media = list(set(media)) # duplicate media don't increase page size |
|||
return media |
|||
|
|||
|
|||
def generate_metadata(path, context): |
|||
output_path = context['OUTPUT_PATH'] |
|||
output_file = context['output_file'] |
|||
media_size = 0 |
|||
# enumerate all media displayed on the page |
|||
for m in get_media(path): |
|||
m = os.path.join(output_path, m) |
|||
# filter out SITEURL to prevent trouble |
|||
m = m.replace(context['SITEURL']+'/', '') |
|||
|
|||
if os.path.exists(m): |
|||
media_size = media_size + os.path.getsize(m) |
|||
|
|||
current_file = os.path.join(output_path, output_file) |
|||
file_size = os.path.getsize(current_file) |
|||
|
|||
with open(current_file, 'a') as f: |
|||
file_size = file_size + media_size |
|||
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size) |
|||
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata)) # cursed code is cursed |
|||
f.write(metadata) |
|||
|
|||
# TODO: add a way to nicely insert the meta-data into an element with id |
|||
|
|||
|
|||
def register(): |
|||
signals.content_written.connect(generate_metadata) |
Loading…
Reference in new issue