From 6260572237fd99265ceaa4ad1588f052e8056b8b Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Thu, 27 Aug 2015 17:02:05 +0200 Subject: [PATCH] merged roels unicsss.py code --- etherdump | 25 ++++++++++++++++++++++--- linkify.py | 52 ++++++++++++++++++++++++++++++---------------------- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/etherdump b/etherdump index f589d2c..96636d4 100755 --- a/etherdump +++ b/etherdump @@ -1,4 +1,7 @@ #!/usr/bin/env python +# License: AGPL +# + from __future__ import print_function # stdlib import json, sys, os, re @@ -47,9 +50,10 @@ p.add_argument("--showurls", default=False, action="store_true", help="flag to d p.add_argument("--hidepaths", default=False, action="store_true", help="flag to not display paths") p.add_argument("--pretend", default=False, action="store_true", help="flag to not actually save") p.add_argument("--add-images", default=False, action="store_true", help="flag to add image tags") +p.add_argument("--authors-css", default="authors.css", help="filename to save collected authorship css (nb: etherdump will overwrite this file!)") # TODO css from pad --- ie specify a padid for a stylesheet!!!!!! -p.add_argument("--css", default="styles.css", help="padid of stylesheet") +# p.add_argument("--css", default="styles.css", help="padid of stylesheet") args = p.parse_args() @@ -77,6 +81,7 @@ todo = args.padid done = set() count = 0 data = {} +authors_css_rules = {} data['apikey'] = info['apikey'] if args.allpads: @@ -245,7 +250,15 @@ while len(todo) > 0: # extract the style tag (with authorship colors) style = t.find(".//style") if style != None: - style = ET.tostring(style, method="html") + if args.authors_css: + for i in style.text.splitlines(): + if len(i): + selector, rule = i.split(' ',1) + authors_css_rules[selector] = rule + style = '' # strip the individual style tag from each page (only exports to authors-css file) + # nb: it's up to the template to refer to the authors-css file + else: + style = ET.tostring(style, method="html") else: style = "" # and extract the contents of the body @@ -273,4 +286,10 @@ while len(todo) > 0: if args.limit and count >= args.limit: break except TypeError: - print ("ERROR, skipping!", file=sys.stderr) \ No newline at end of file + print ("ERROR, skipping!", file=sys.stderr) + +# Write the unified CSS with authors +if args.authors_css: + with open(args.authors_css, 'w') as css: + for selector, rule in sorted(authors_css_rules.items()): + css.write(selector+' '+rule+'\n') \ No newline at end of file diff --git a/linkify.py b/linkify.py index 3609b71..981af11 100644 --- a/linkify.py +++ b/linkify.py @@ -3,39 +3,47 @@ import re, sys def strip_tags (text): - return re.sub(r"<.*?>", "", text) + return re.sub(r"<.*?>", "", text) def urlify (t, ext=".html"): - return t.replace(" ", "_") + ext + return t.replace(" ", "_") + ext def filename_to_padid (t): - t = t.replace("_", " ") - t = re.sub(r"\.html$", "", t) - return t + t = t.replace("_", " ") + t = re.sub(r"\.html$", "", t) + return t + +def normalize_pad_name (n): + if '?' in n: + n = n.split('?', 1)[0] + if '/' in n: + n = n.split('/', 1)[0] + return n def linkify (src, urlify=urlify): - collect = [] + collect = [] - def s (m): - contents = strip_tags(m.group(1)) - collect.append(contents) - link = urlify(contents) - # link = link.split("?", 1)[0] - return "[[{1}]]".format(link, contents) + def s (m): + contents = strip_tags(m.group(1)) + contents = normalize_pad_name(contents) + collect.append(contents) + link = urlify(contents) + # link = link.split("?", 1)[0] + return "[[{1}]]".format(link, contents) - # src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src) - ## question marks are ignored by etherpad, so split/strip it - ## strip slashes as well!! (/timeslider) - src = re.sub(r"\[\[(.+?)(\/.*)?(\?.*)?\]\]", s, src) - return (src, collect) + # src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src) + ## question marks are ignored by etherpad, so split/strip it + ## strip slashes as well!! (/timeslider) + src = re.sub(r"\[\[(.+?)\]\]", s, src) + return (src, collect) if __name__ == "__main__": - src = sys.stdin.read() - src, links = linkify(src) + src = sys.stdin.read() + src, links = linkify(src) - for l in links: - print (l) + for l in links: + print (l) - print (src) + print (src)