You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
100 lines
3.1 KiB
100 lines
3.1 KiB
5 years ago
|
#!/usr/bin/env python
|
||
|
|
||
|
# -*- coding: utf-8 -*- #
|
||
|
|
||
|
# Page Meta-Data
|
||
|
# ------------------------
|
||
|
# Insert meta-data about the generated file into the resulting HMTL.
|
||
|
# Copyright (C) 2019 Roel Roscam Abbing
|
||
|
#
|
||
|
# Support your local Low-Tech Magazine:
|
||
|
# https://solar.lowtechmagazine.com/donate.html
|
||
|
|
||
|
# This program is free software: you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU General Public License as published by
|
||
|
# the Free Software Foundation, either version 3 of the License, or
|
||
|
# (at your option) any later version.
|
||
|
|
||
|
# This program is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU General Public License for more details.
|
||
|
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||
|
|
||
|
from __future__ import unicode_literals
|
||
|
from pelican import signals
|
||
|
from bs4 import BeautifulSoup
|
||
|
import os
|
||
|
|
||
|
|
||
|
def get_printable_size(byte_size):
|
||
|
"""
|
||
|
Thanks Pobux!
|
||
|
https://gist.github.com/Pobux/0c474672b3acd4473d459d3219675ad8
|
||
|
"""
|
||
|
BASE_SIZE = 1024.00
|
||
|
MEASURE = ["B", "KB", "MB", "GB", "TB", "PB"]
|
||
|
|
||
|
def _fix_size(size, size_index):
|
||
|
if not size:
|
||
|
return "0"
|
||
|
elif size_index == 0:
|
||
|
return str(size)
|
||
|
else:
|
||
|
return "{:.3f}".format(size)
|
||
|
|
||
|
current_size = byte_size
|
||
|
size_index = 0
|
||
|
|
||
|
while current_size >= BASE_SIZE and len(MEASURE) != size_index:
|
||
|
current_size = current_size / BASE_SIZE
|
||
|
size_index = size_index + 1
|
||
|
|
||
|
size = _fix_size(current_size, size_index)
|
||
|
measure = MEASURE[size_index]
|
||
|
return size + measure
|
||
|
|
||
|
|
||
|
def get_media(html_file):
|
||
|
"""
|
||
|
Currently only images because I, for one, am lazy.
|
||
|
"""
|
||
|
html_file = open(html_file).read()
|
||
|
soup = BeautifulSoup(html_file, 'html.parser')
|
||
|
media = []
|
||
|
for img in soup(['img', 'object']):
|
||
|
media.append(img['src'])
|
||
|
media = list(set(media)) # duplicate media don't increase page size
|
||
|
return media
|
||
|
|
||
|
|
||
|
def generate_metadata(path, context):
|
||
|
output_path = context['OUTPUT_PATH']
|
||
|
output_file = context['output_file']
|
||
|
media_size = 0
|
||
|
# enumerate all media displayed on the page
|
||
|
for m in get_media(path):
|
||
|
m = os.path.join(output_path, m)
|
||
|
# filter out SITEURL to prevent trouble
|
||
|
m = m.replace(context['SITEURL']+'/', '')
|
||
|
|
||
|
if os.path.exists(m):
|
||
|
media_size = media_size + os.path.getsize(m)
|
||
|
|
||
|
current_file = os.path.join(output_path, output_file)
|
||
|
file_size = os.path.getsize(current_file)
|
||
|
|
||
|
with open(current_file, 'a') as f:
|
||
|
file_size = file_size + media_size
|
||
|
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size)
|
||
|
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata)) # cursed code is cursed
|
||
|
f.write(metadata)
|
||
|
|
||
|
# TODO: add a way to nicely insert the meta-data into an element with id
|
||
|
|
||
|
|
||
|
def register():
|
||
|
signals.content_written.connect(generate_metadata)
|