initial commit
This commit is contained in:
commit
83d57027b1
9
README.md
Normal file
9
README.md
Normal file
@ -0,0 +1,9 @@
|
||||
#Page Meta-Data
|
||||
|
||||
A plugin to add meta-data for each generated page in the format:
|
||||
|
||||
`index.html Tue 12 Nov 2019 03:52:15 PM CET 57.637KB`
|
||||
|
||||
It calculates the weight of the HTML page including all image media. A time stamp reflecting the date of generation is also added.
|
||||
|
||||
The data is appended at the end of the HTML, so nothing fancy for now.
|
1
__init__.py
Normal file
1
__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .page_metadata import *
|
99
page_metadata.py
Normal file
99
page_metadata.py
Normal file
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# -*- coding: utf-8 -*- #
|
||||
|
||||
# Page Meta-Data
|
||||
# ------------------------
|
||||
# Insert meta-data about the generated file into the resulting HMTL.
|
||||
# Copyright (C) 2019 Roel Roscam Abbing
|
||||
#
|
||||
# Support your local Low-Tech Magazine:
|
||||
# https://solar.lowtechmagazine.com/donate.html
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from pelican import signals
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
|
||||
|
||||
def get_printable_size(byte_size):
|
||||
"""
|
||||
Thanks Pobux!
|
||||
https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8
|
||||
"""
|
||||
BASE_SIZE = 1024.00
|
||||
MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"]
|
||||
|
||||
def _fix_size(size, size_index):
|
||||
if not size:
|
||||
return "0"
|
||||
elif size_index == 0:
|
||||
return str(size)
|
||||
else:
|
||||
return "{:.3f}".format(size)
|
||||
|
||||
current_size = byte_size
|
||||
size_index = 0
|
||||
|
||||
while current_size >= BASE_SIZE and len(MEASURE) != size_index:
|
||||
current_size = current_size / BASE_SIZE
|
||||
size_index = size_index + 1
|
||||
|
||||
size = _fix_size(current_size, size_index)
|
||||
measure = MEASURE[size_index]
|
||||
return size + measure
|
||||
|
||||
|
||||
def get_media(html_file):
|
||||
"""
|
||||
Currently only images because I, for one, am lazy.
|
||||
"""
|
||||
html_file = open(html_file).read()
|
||||
soup = BeautifulSoup(html_file, 'html.parser')
|
||||
media = []
|
||||
for img in soup(['img', 'object']):
|
||||
media.append(img['src'])
|
||||
media = list(set(media)) # duplicate media don't increase page size
|
||||
return media
|
||||
|
||||
|
||||
def generate_metadata(path, context):
|
||||
output_path = context['OUTPUT_PATH']
|
||||
output_file = context['output_file']
|
||||
media_size = 0
|
||||
# enumerate all media displayed on the page
|
||||
for m in get_media(path):
|
||||
m = os.path.join(output_path, m)
|
||||
# filter out SITEURL to prevent trouble
|
||||
m = m.replace(context['SITEURL']+'/', '')
|
||||
|
||||
if os.path.exists(m):
|
||||
media_size = media_size + os.path.getsize(m)
|
||||
|
||||
current_file = os.path.join(output_path, output_file)
|
||||
file_size = os.path.getsize(current_file)
|
||||
|
||||
with open(current_file, 'a') as f:
|
||||
file_size = file_size + media_size
|
||||
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size)
|
||||
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata)) # cursed code is cursed
|
||||
f.write(metadata)
|
||||
|
||||
# TODO: add a way to nicely insert the meta-data into an element with id
|
||||
|
||||
|
||||
def register():
|
||||
signals.content_written.connect(generate_metadata)
|
Loading…
Reference in New Issue
Block a user