|
|
@ -43,7 +43,7 @@ def get_printable_size(byte_size): |
|
|
|
elif size_index == 0: |
|
|
|
return str(size) |
|
|
|
else: |
|
|
|
return "{:.3f}".format(size) |
|
|
|
return "{:.2f}".format(size) |
|
|
|
|
|
|
|
current_size = byte_size |
|
|
|
size_index = 0 |
|
|
@ -56,6 +56,13 @@ def get_printable_size(byte_size): |
|
|
|
measure = MEASURE[size_index] |
|
|
|
return size + measure |
|
|
|
|
|
|
|
def get_assets(soup): |
|
|
|
assets = [] |
|
|
|
for a in soup.findAll('link', {'rel':['apple-touch-icon','icon','stylesheet']}): |
|
|
|
a = a['href'].split('?')[0] |
|
|
|
if a not in assets: |
|
|
|
assets.append(a) |
|
|
|
return assets |
|
|
|
|
|
|
|
def get_media(html_file): |
|
|
|
""" |
|
|
@ -64,36 +71,69 @@ def get_media(html_file): |
|
|
|
html_file = open(html_file).read() |
|
|
|
soup = BeautifulSoup(html_file, 'html.parser') |
|
|
|
media = [] |
|
|
|
|
|
|
|
for img in soup(['img', 'object']): |
|
|
|
media.append(img['src']) |
|
|
|
media = list(set(media)) # duplicate media don't increase page size |
|
|
|
return media |
|
|
|
|
|
|
|
featured_images = soup.findAll('div', {'class':'featured-img'}) |
|
|
|
for fi in featured_images: |
|
|
|
fi = fi['style'] |
|
|
|
start = fi.find("url('") |
|
|
|
end = fi.find("');") |
|
|
|
url = fi[start+len("url('"):end] |
|
|
|
media.append(url) |
|
|
|
|
|
|
|
assets = get_assets(soup) |
|
|
|
media = list(set(media+assets)) # duplicate media don't increase page size |
|
|
|
return media, soup |
|
|
|
|
|
|
|
def generate_metadata(path, context): |
|
|
|
output_path = context['OUTPUT_PATH'] |
|
|
|
output_file = context['output_file'] |
|
|
|
siteurl = context['SITEURL'] |
|
|
|
plugins = context['PLUGINS'] |
|
|
|
subsites = False |
|
|
|
|
|
|
|
if 'i18n_subsites' in plugins: |
|
|
|
subsites = True |
|
|
|
lang = context['DEFAULT_LANG'] |
|
|
|
general_output_path = output_path.replace(lang, '').strip('/') |
|
|
|
siteurl = siteurl.replace(lang,'').strip('/') |
|
|
|
|
|
|
|
media_size = 0 |
|
|
|
# enumerate all media displayed on the page |
|
|
|
for m in get_media(path): |
|
|
|
m = os.path.join(output_path, m) |
|
|
|
|
|
|
|
media, soup = get_media(path) #reuse the same soup to limit calculation |
|
|
|
|
|
|
|
for m in media: |
|
|
|
|
|
|
|
# filter out SITEURL to prevent trouble |
|
|
|
m = m.replace(context['SITEURL']+'/', '') |
|
|
|
file_name = m.replace(context['SITEURL']+'/', '') |
|
|
|
|
|
|
|
# join output path to file, need to strip any leading slash for os.path |
|
|
|
if subsites: |
|
|
|
m = os.path.join(general_output_path, file_name.strip('/')) |
|
|
|
else: |
|
|
|
m = os.path.join(output_path, file_name.strip('/')) |
|
|
|
|
|
|
|
if os.path.exists(m): |
|
|
|
media_size = media_size + os.path.getsize(m) |
|
|
|
|
|
|
|
current_file = os.path.join(output_path, output_file) |
|
|
|
file_size = os.path.getsize(current_file) |
|
|
|
|
|
|
|
with open(current_file, 'a') as f: |
|
|
|
file_size = file_size + media_size |
|
|
|
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size) |
|
|
|
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata)) # cursed code is cursed |
|
|
|
f.write(metadata) |
|
|
|
file_size = file_size + media_size |
|
|
|
metadata = get_printable_size(file_size) |
|
|
|
metadata = get_printable_size(file_size+len(metadata)) # cursed code is cursed |
|
|
|
|
|
|
|
# TODO: add a way to nicely insert the meta-data into an element with id |
|
|
|
insert_metadata(path, metadata, soup) |
|
|
|
|
|
|
|
def insert_metadata(output_file, metadata, soup): |
|
|
|
tag = soup.find('div', {'id':'page-size'}) |
|
|
|
if tag: |
|
|
|
with open(output_file,'w') as f: |
|
|
|
tag.string = '{}'.format(metadata) |
|
|
|
f.write(str(soup)) |
|
|
|
|
|
|
|
def register(): |
|
|
|
signals.content_written.connect(generate_metadata) |
|
|
|