count all media on page, including background images, handle "i18n_subsites" plugin
This commit is contained in:
parent
83d57027b1
commit
5e96200f71
@ -43,7 +43,7 @@ def get_printable_size(byte_size):
|
||||
elif size_index == 0:
|
||||
return str(size)
|
||||
else:
|
||||
return "{:.3f}".format(size)
|
||||
return "{:.2f}".format(size)
|
||||
|
||||
current_size = byte_size
|
||||
size_index = 0
|
||||
@ -56,6 +56,13 @@ def get_printable_size(byte_size):
|
||||
measure = MEASURE[size_index]
|
||||
return size + measure
|
||||
|
||||
def get_assets(soup):
|
||||
assets = []
|
||||
for a in soup.findAll('link', {'rel':['apple-touch-icon','icon','stylesheet']}):
|
||||
a = a['href'].split('?')[0]
|
||||
if a not in assets:
|
||||
assets.append(a)
|
||||
return assets
|
||||
|
||||
def get_media(html_file):
|
||||
"""
|
||||
@ -64,36 +71,69 @@ def get_media(html_file):
|
||||
html_file = open(html_file).read()
|
||||
soup = BeautifulSoup(html_file, 'html.parser')
|
||||
media = []
|
||||
|
||||
for img in soup(['img', 'object']):
|
||||
media.append(img['src'])
|
||||
media = list(set(media)) # duplicate media don't increase page size
|
||||
return media
|
||||
|
||||
featured_images = soup.findAll('div', {'class':'featured-img'})
|
||||
for fi in featured_images:
|
||||
fi = fi['style']
|
||||
start = fi.find("url('")
|
||||
end = fi.find("');")
|
||||
url = fi[start+len("url('"):end]
|
||||
media.append(url)
|
||||
|
||||
assets = get_assets(soup)
|
||||
media = list(set(media+assets)) # duplicate media don't increase page size
|
||||
return media, soup
|
||||
|
||||
def generate_metadata(path, context):
|
||||
output_path = context['OUTPUT_PATH']
|
||||
output_file = context['output_file']
|
||||
siteurl = context['SITEURL']
|
||||
plugins = context['PLUGINS']
|
||||
subsites = False
|
||||
|
||||
if 'i18n_subsites' in plugins:
|
||||
subsites = True
|
||||
lang = context['DEFAULT_LANG']
|
||||
general_output_path = output_path.replace(lang, '').strip('/')
|
||||
siteurl = siteurl.replace(lang,'').strip('/')
|
||||
|
||||
media_size = 0
|
||||
# enumerate all media displayed on the page
|
||||
for m in get_media(path):
|
||||
m = os.path.join(output_path, m)
|
||||
# filter out SITEURL to prevent trouble
|
||||
m = m.replace(context['SITEURL']+'/', '')
|
||||
|
||||
media, soup = get_media(path) #reuse the same soup to limit calculation
|
||||
|
||||
for m in media:
|
||||
|
||||
# filter out SITEURL to prevent trouble
|
||||
file_name = m.replace(context['SITEURL']+'/', '')
|
||||
|
||||
# join output path to file, need to strip any leading slash for os.path
|
||||
if subsites:
|
||||
m = os.path.join(general_output_path, file_name.strip('/'))
|
||||
else:
|
||||
m = os.path.join(output_path, file_name.strip('/'))
|
||||
|
||||
if os.path.exists(m):
|
||||
media_size = media_size + os.path.getsize(m)
|
||||
|
||||
current_file = os.path.join(output_path, output_file)
|
||||
file_size = os.path.getsize(current_file)
|
||||
|
||||
with open(current_file, 'a') as f:
|
||||
file_size = file_size + media_size
|
||||
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size)
|
||||
metadata = output_file + ' ' + context['NOW'] + ' ' + get_printable_size(file_size+len(metadata)) # cursed code is cursed
|
||||
f.write(metadata)
|
||||
file_size = file_size + media_size
|
||||
metadata = get_printable_size(file_size)
|
||||
metadata = get_printable_size(file_size+len(metadata)) # cursed code is cursed
|
||||
|
||||
# TODO: add a way to nicely insert the meta-data into an element with id
|
||||
insert_metadata(path, metadata, soup)
|
||||
|
||||
def insert_metadata(output_file, metadata, soup):
|
||||
tag = soup.find('div', {'id':'page-size'})
|
||||
if tag:
|
||||
with open(output_file,'w') as f:
|
||||
tag.string = '{}'.format(metadata)
|
||||
f.write(str(soup))
|
||||
|
||||
def register():
|
||||
signals.content_written.connect(generate_metadata)
|
||||
|
Loading…
Reference in New Issue
Block a user