updated dump_html

This commit is contained in:
Michael Murtaugh 2015-07-23 18:34:36 +02:00
parent 76cb1b28a1
commit b87674e050
5 changed files with 61 additions and 63 deletions

View File

@ -24,9 +24,9 @@ The easiest way to use etherdump is to create a padinfo JSON file that contains
cp padinfo.sample.json padinfo.json cp padinfo.sample.json padinfo.json
nano padinfo.json nano padinfo.json
And then... And then for instance:
etherdump --padinfo padinfo.json list etherdump --padinfo padinfo.json listpads
listpads listpads

View File

@ -6,7 +6,7 @@ from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError from urllib2 import urlopen, HTTPError, URLError
p = ArgumentParser("") p = ArgumentParser("")
p.add_argument("padid") p.add_argument("padid", help="the padid")
p.add_argument("--startrev", type=int, default=0, help="starting revision") p.add_argument("--startrev", type=int, default=0, help="starting revision")
p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last") p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json") p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")

View File

@ -2,12 +2,14 @@
from __future__ import print_function from __future__ import print_function
from argparse import ArgumentParser from argparse import ArgumentParser
import json, sys, os import json, sys, os
from datetime import datetime
import html5lib
from urllib import urlencode from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError from urllib2 import urlopen, HTTPError, URLError
from xml.etree import cElementTree as ET from xml.etree import cElementTree as ET
import html5lib
from trim import trim_removed_spans, contents from trim import trim_removed_spans, contents
from linkify import linkify, urlify from linkify import linkify, urlify
import jinja2
p = ArgumentParser("") p = ArgumentParser("")
@ -16,8 +18,19 @@ p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padi
p.add_argument("--path", default="output", help="path to save files, default: output") p.add_argument("--path", default="output", help="path to save files, default: output")
p.add_argument("--verbose", default=False, action="store_true") p.add_argument("--verbose", default=False, action="store_true")
p.add_argument("--limit", type=int, default=None) p.add_argument("--limit", type=int, default=None)
p.add_argument("--templates", default="templates")
p.add_argument("--template", default="pad_html.html")
args = p.parse_args() args = p.parse_args()
def get_template_env (tpath=None):
paths = []
if tpath and os.path.isdir(tpath):
paths.append(tpath)
# paths.append(TEMPLATES_PATH)
loader = jinja2.FileSystemLoader(paths)
env = jinja2.Environment(loader=loader)
return env
with open(args.padinfo) as f: with open(args.padinfo) as f:
info = json.load(f) info = json.load(f)
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
@ -26,6 +39,9 @@ todo = [args.padid]
done = set() done = set()
count = 0 count = 0
env = get_template_env(args.templates)
template = env.get_template(args.template)
while len(todo) > 0: while len(todo) > 0:
padid = todo[0] padid = todo[0]
todo = todo[1:] todo = todo[1:]
@ -62,7 +78,23 @@ while len(todo) > 0:
except OSError: except OSError:
pass pass
with open(out, "w") as f: with open(out, "w") as f:
f.write(html.encode("utf-8")) t = html5lib.parse(html, namespaceHTMLElements=False)
style = t.find(".//style")
if style != None:
style = ET.tostring(style, method="html")
else:
style = ""
body = t.find(".//body")
html = contents(body)
# f.write(html.encode("utf-8"))
f.write(template.render(
html = html,
style = style,
revision = total_revisions,
padid = padid,
timestamp = datetime.now()
).encode("utf-8"))
count += 1 count += 1
if args.limit and count >= args.limit: if args.limit and count >= args.limit:

View File

@ -8,6 +8,8 @@ from xml.etree import ElementTree as ET
from urllib import urlencode from urllib import urlencode
from urlparse import urljoin from urlparse import urljoin
from datetime import datetime from datetime import datetime
from padserver import PadServer
PADINFO_DEFAULTS = { PADINFO_DEFAULTS = {
"hostname": "", "hostname": "",
@ -34,63 +36,6 @@ def content(tag):
else: else:
return tag.text + u''.join(ET.tostring(e) for e in tag) return tag.text + u''.join(ET.tostring(e) for e in tag)
class PadServer (object):
def __init__ (self, hostname, port=9001, apipath="/api/", apiversion="1.2.9", apikey=None, secure=False):
self.hostname = hostname
if secure:
self.protocol = "https"
else:
self.protocol = "http"
self.apiurl = self.protocol+"://"+hostname
if port:
self.apiurl += ":{0}".format(port)
self.apiurl += "{0}{1}/".format(apipath, apiversion)
self.apikey = apikey
def listAllPads (self):
data = {'apikey': self.apikey}
url = self.apiurl+'listAllPads?'+urlencode(data)
return json.load(urlopen(url))['data']['padIDs']
def listAllGroups (self):
data = {'apikey': self.apikey}
url = self.apiurl+'listAllGroups?'+urlencode(data)
return json.load(urlopen(url))['data']['groupIDs']
def getPadText (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
return json.load(urlopen(self.apiurl+'getText?'+urlencode(data)))['data']['text']
def getPadHTML (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
return json.load(urlopen(self.apiurl+'getHTML?'+urlencode(data)))['data']['html']
def getPadLastEdited (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
raw = json.load(urlopen(self.apiurl+'getLastEdited?'+urlencode(data)))['data']['lastEdited']
try:
return datetime.fromtimestamp(int(raw)/1000)
except TypeError as e:
return None
def getPadURL (self, padID, groupinfo=None):
group, name = pad_split_group(padID)
if group:
gid = group
if gid.startswith("g."):
gid = gid[2:]
if groupinfo:
ginfo = groupinfo.get(gid)
if ginfo:
groupID = ginfo.get("id", 0)
else:
groupID = 0
else:
groupID = 0
return self.protocol+"://"+self.hostname+"/group.html/"+str(groupID)+"/pad.html/"+padID
else:
return self.protocol+"://"+self.hostname+"/public_pad/"+padID
def get_template_env (tpath=None): def get_template_env (tpath=None):
import jinja2 import jinja2
@ -291,6 +236,8 @@ if __name__ == "__main__":
parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file') parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file')
parser.add_argument('--output', default=None, help='(index) path for output (default stdout)') parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
parser.add_argument('--pad', default="start", help='(history) pad id')
parser.add_argument('--rev', default="", help='(history) revision id')
args = parser.parse_args() args = parser.parse_args()
@ -448,6 +395,19 @@ if __name__ == "__main__":
if args.output: if args.output:
out.close() out.close()
elif cmd == "revisions":
print (padserver.getRevisionsCount(args.pad))
elif cmd == "authors":
print (padserver.listAuthorsOfPad(args.pad))
elif cmd == "changeset":
print (padserver.getRevisionChangeset(args.pad, args.rev))
elif cmd == "history":
revs = padserver.getRevisionsCount(args.pad)
data = padserver.createDiffHTML(args.pad, 1, revs)
print (data['html'])
else: else:
print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr) print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr)

View File

@ -8,6 +8,7 @@ from urllib2 import urlopen, HTTPError, URLError
p = ArgumentParser("") p = ArgumentParser("")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json") p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--list", default=False, action="store_true", help="display one per line")
args = p.parse_args() args = p.parse_args()
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -19,5 +20,10 @@ requesturl = apiurl+'listAllPads?'+urlencode(data)
if args.showurl: if args.showurl:
print requesturl print requesturl
else: else:
print json.dumps(json.load(urlopen(requesturl))['data']['padIDs']) results = json.load(urlopen(requesturl))['data']['padIDs']
if args.list:
for r in results:
print r
else:
print json.dumps(results)