From b87674e0509c188630003dd322584fab548daf2c Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Thu, 23 Jul 2015 18:34:36 +0200 Subject: [PATCH] updated dump_html --- README.md | 4 +-- createDiffHTML.py | 2 +- dump_html.py | 36 +++++++++++++++++++++-- etherdump | 74 +++++++++++------------------------------------ listAllPads.py | 8 ++++- 5 files changed, 61 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 9a2754d..b7c9b58 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,9 @@ The easiest way to use etherdump is to create a padinfo JSON file that contains cp padinfo.sample.json padinfo.json nano padinfo.json -And then... +And then for instance: - etherdump --padinfo padinfo.json list + etherdump --padinfo padinfo.json listpads listpads diff --git a/createDiffHTML.py b/createDiffHTML.py index cf5ecbb..3a224fe 100755 --- a/createDiffHTML.py +++ b/createDiffHTML.py @@ -6,7 +6,7 @@ from urllib import urlencode from urllib2 import urlopen, HTTPError, URLError p = ArgumentParser("") -p.add_argument("padid") +p.add_argument("padid", help="the padid") p.add_argument("--startrev", type=int, default=0, help="starting revision") p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last") p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json") diff --git a/dump_html.py b/dump_html.py index 0a380a3..87b20ff 100755 --- a/dump_html.py +++ b/dump_html.py @@ -2,12 +2,14 @@ from __future__ import print_function from argparse import ArgumentParser import json, sys, os +from datetime import datetime +import html5lib from urllib import urlencode from urllib2 import urlopen, HTTPError, URLError from xml.etree import cElementTree as ET -import html5lib from trim import trim_removed_spans, contents from linkify import linkify, urlify +import jinja2 p = ArgumentParser("") @@ -16,8 +18,19 @@ p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padi p.add_argument("--path", default="output", help="path to save files, default: output") p.add_argument("--verbose", default=False, action="store_true") p.add_argument("--limit", type=int, default=None) +p.add_argument("--templates", default="templates") +p.add_argument("--template", default="pad_html.html") args = p.parse_args() +def get_template_env (tpath=None): + paths = [] + if tpath and os.path.isdir(tpath): + paths.append(tpath) + # paths.append(TEMPLATES_PATH) + loader = jinja2.FileSystemLoader(paths) + env = jinja2.Environment(loader=loader) + return env + with open(args.padinfo) as f: info = json.load(f) apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) @@ -26,6 +39,9 @@ todo = [args.padid] done = set() count = 0 +env = get_template_env(args.templates) +template = env.get_template(args.template) + while len(todo) > 0: padid = todo[0] todo = todo[1:] @@ -62,7 +78,23 @@ while len(todo) > 0: except OSError: pass with open(out, "w") as f: - f.write(html.encode("utf-8")) + t = html5lib.parse(html, namespaceHTMLElements=False) + style = t.find(".//style") + if style != None: + style = ET.tostring(style, method="html") + else: + style = "" + body = t.find(".//body") + html = contents(body) + + # f.write(html.encode("utf-8")) + f.write(template.render( + html = html, + style = style, + revision = total_revisions, + padid = padid, + timestamp = datetime.now() + ).encode("utf-8")) count += 1 if args.limit and count >= args.limit: diff --git a/etherdump b/etherdump index d67af57..c8625b5 100755 --- a/etherdump +++ b/etherdump @@ -8,6 +8,8 @@ from xml.etree import ElementTree as ET from urllib import urlencode from urlparse import urljoin from datetime import datetime +from padserver import PadServer + PADINFO_DEFAULTS = { "hostname": "", @@ -34,63 +36,6 @@ def content(tag): else: return tag.text + u''.join(ET.tostring(e) for e in tag) -class PadServer (object): - def __init__ (self, hostname, port=9001, apipath="/api/", apiversion="1.2.9", apikey=None, secure=False): - self.hostname = hostname - if secure: - self.protocol = "https" - else: - self.protocol = "http" - - self.apiurl = self.protocol+"://"+hostname - if port: - self.apiurl += ":{0}".format(port) - self.apiurl += "{0}{1}/".format(apipath, apiversion) - self.apikey = apikey - - def listAllPads (self): - data = {'apikey': self.apikey} - url = self.apiurl+'listAllPads?'+urlencode(data) - return json.load(urlopen(url))['data']['padIDs'] - - def listAllGroups (self): - data = {'apikey': self.apikey} - url = self.apiurl+'listAllGroups?'+urlencode(data) - return json.load(urlopen(url))['data']['groupIDs'] - - def getPadText (self, padID): - data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")} - return json.load(urlopen(self.apiurl+'getText?'+urlencode(data)))['data']['text'] - - def getPadHTML (self, padID): - data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")} - return json.load(urlopen(self.apiurl+'getHTML?'+urlencode(data)))['data']['html'] - - def getPadLastEdited (self, padID): - data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")} - raw = json.load(urlopen(self.apiurl+'getLastEdited?'+urlencode(data)))['data']['lastEdited'] - try: - return datetime.fromtimestamp(int(raw)/1000) - except TypeError as e: - return None - - def getPadURL (self, padID, groupinfo=None): - group, name = pad_split_group(padID) - if group: - gid = group - if gid.startswith("g."): - gid = gid[2:] - if groupinfo: - ginfo = groupinfo.get(gid) - if ginfo: - groupID = ginfo.get("id", 0) - else: - groupID = 0 - else: - groupID = 0 - return self.protocol+"://"+self.hostname+"/group.html/"+str(groupID)+"/pad.html/"+padID - else: - return self.protocol+"://"+self.hostname+"/public_pad/"+padID def get_template_env (tpath=None): import jinja2 @@ -291,6 +236,8 @@ if __name__ == "__main__": parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file') parser.add_argument('--output', default=None, help='(index) path for output (default stdout)') + parser.add_argument('--pad', default="start", help='(history) pad id') + parser.add_argument('--rev', default="", help='(history) revision id') args = parser.parse_args() @@ -448,6 +395,19 @@ if __name__ == "__main__": if args.output: out.close() + elif cmd == "revisions": + print (padserver.getRevisionsCount(args.pad)) + + elif cmd == "authors": + print (padserver.listAuthorsOfPad(args.pad)) + + elif cmd == "changeset": + print (padserver.getRevisionChangeset(args.pad, args.rev)) + + elif cmd == "history": + revs = padserver.getRevisionsCount(args.pad) + data = padserver.createDiffHTML(args.pad, 1, revs) + print (data['html']) else: print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr) diff --git a/listAllPads.py b/listAllPads.py index 5fbfe36..f5bac7d 100755 --- a/listAllPads.py +++ b/listAllPads.py @@ -8,6 +8,7 @@ from urllib2 import urlopen, HTTPError, URLError p = ArgumentParser("") p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json") p.add_argument("--showurl", default=False, action="store_true") +p.add_argument("--list", default=False, action="store_true", help="display one per line") args = p.parse_args() with open(args.padinfo) as f: @@ -19,5 +20,10 @@ requesturl = apiurl+'listAllPads?'+urlencode(data) if args.showurl: print requesturl else: - print json.dumps(json.load(urlopen(requesturl))['data']['padIDs']) + results = json.load(urlopen(requesturl))['data']['padIDs'] + if args.list: + for r in results: + print r + else: + print json.dumps(results)