|
|
@ -1,4 +1,7 @@ |
|
|
|
#!/usr/bin/env python |
|
|
|
# License: AGPL |
|
|
|
# |
|
|
|
|
|
|
|
from __future__ import print_function |
|
|
|
# stdlib |
|
|
|
import json, sys, os, re |
|
|
@ -47,9 +50,10 @@ p.add_argument("--showurls", default=False, action="store_true", help="flag to d |
|
|
|
p.add_argument("--hidepaths", default=False, action="store_true", help="flag to not display paths") |
|
|
|
p.add_argument("--pretend", default=False, action="store_true", help="flag to not actually save") |
|
|
|
p.add_argument("--add-images", default=False, action="store_true", help="flag to add image tags") |
|
|
|
p.add_argument("--authors-css", default="authors.css", help="filename to save collected authorship css (nb: etherdump will overwrite this file!)") |
|
|
|
|
|
|
|
# TODO css from pad --- ie specify a padid for a stylesheet!!!!!! |
|
|
|
p.add_argument("--css", default="styles.css", help="padid of stylesheet") |
|
|
|
# p.add_argument("--css", default="styles.css", help="padid of stylesheet") |
|
|
|
|
|
|
|
|
|
|
|
args = p.parse_args() |
|
|
@ -77,6 +81,7 @@ todo = args.padid |
|
|
|
done = set() |
|
|
|
count = 0 |
|
|
|
data = {} |
|
|
|
authors_css_rules = {} |
|
|
|
data['apikey'] = info['apikey'] |
|
|
|
|
|
|
|
if args.allpads: |
|
|
@ -245,7 +250,15 @@ while len(todo) > 0: |
|
|
|
# extract the style tag (with authorship colors) |
|
|
|
style = t.find(".//style") |
|
|
|
if style != None: |
|
|
|
style = ET.tostring(style, method="html") |
|
|
|
if args.authors_css: |
|
|
|
for i in style.text.splitlines(): |
|
|
|
if len(i): |
|
|
|
selector, rule = i.split(' ',1) |
|
|
|
authors_css_rules[selector] = rule |
|
|
|
style = '' # strip the individual style tag from each page (only exports to authors-css file) |
|
|
|
# nb: it's up to the template to refer to the authors-css file |
|
|
|
else: |
|
|
|
style = ET.tostring(style, method="html") |
|
|
|
else: |
|
|
|
style = "" |
|
|
|
# and extract the contents of the body |
|
|
@ -273,4 +286,10 @@ while len(todo) > 0: |
|
|
|
if args.limit and count >= args.limit: |
|
|
|
break |
|
|
|
except TypeError: |
|
|
|
print ("ERROR, skipping!", file=sys.stderr) |
|
|
|
print ("ERROR, skipping!", file=sys.stderr) |
|
|
|
|
|
|
|
# Write the unified CSS with authors |
|
|
|
if args.authors_css: |
|
|
|
with open(args.authors_css, 'w') as css: |
|
|
|
for selector, rule in sorted(authors_css_rules.items()): |
|
|
|
css.write(selector+' '+rule+'\n') |