updated dump_html

This commit is contained in:
Michael Murtaugh 2015-07-23 18:34:36 +02:00
parent 76cb1b28a1
commit b87674e050
5 changed files with 61 additions and 63 deletions

View File

@ -24,9 +24,9 @@ The easiest way to use etherdump is to create a padinfo JSON file that contains
cp padinfo.sample.json padinfo.json
nano padinfo.json
And then...
And then for instance:
etherdump --padinfo padinfo.json list
etherdump --padinfo padinfo.json listpads
listpads

View File

@ -6,7 +6,7 @@ from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
p = ArgumentParser("")
p.add_argument("padid")
p.add_argument("padid", help="the padid")
p.add_argument("--startrev", type=int, default=0, help="starting revision")
p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")

View File

@ -2,12 +2,14 @@
from __future__ import print_function
from argparse import ArgumentParser
import json, sys, os
from datetime import datetime
import html5lib
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from xml.etree import cElementTree as ET
import html5lib
from trim import trim_removed_spans, contents
from linkify import linkify, urlify
import jinja2
p = ArgumentParser("")
@ -16,8 +18,19 @@ p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padi
p.add_argument("--path", default="output", help="path to save files, default: output")
p.add_argument("--verbose", default=False, action="store_true")
p.add_argument("--limit", type=int, default=None)
p.add_argument("--templates", default="templates")
p.add_argument("--template", default="pad_html.html")
args = p.parse_args()
def get_template_env (tpath=None):
paths = []
if tpath and os.path.isdir(tpath):
paths.append(tpath)
# paths.append(TEMPLATES_PATH)
loader = jinja2.FileSystemLoader(paths)
env = jinja2.Environment(loader=loader)
return env
with open(args.padinfo) as f:
info = json.load(f)
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
@ -26,6 +39,9 @@ todo = [args.padid]
done = set()
count = 0
env = get_template_env(args.templates)
template = env.get_template(args.template)
while len(todo) > 0:
padid = todo[0]
todo = todo[1:]
@ -62,7 +78,23 @@ while len(todo) > 0:
except OSError:
pass
with open(out, "w") as f:
f.write(html.encode("utf-8"))
t = html5lib.parse(html, namespaceHTMLElements=False)
style = t.find(".//style")
if style != None:
style = ET.tostring(style, method="html")
else:
style = ""
body = t.find(".//body")
html = contents(body)
# f.write(html.encode("utf-8"))
f.write(template.render(
html = html,
style = style,
revision = total_revisions,
padid = padid,
timestamp = datetime.now()
).encode("utf-8"))
count += 1
if args.limit and count >= args.limit:

View File

@ -8,6 +8,8 @@ from xml.etree import ElementTree as ET
from urllib import urlencode
from urlparse import urljoin
from datetime import datetime
from padserver import PadServer
PADINFO_DEFAULTS = {
"hostname": "",
@ -34,63 +36,6 @@ def content(tag):
else:
return tag.text + u''.join(ET.tostring(e) for e in tag)
class PadServer (object):
def __init__ (self, hostname, port=9001, apipath="/api/", apiversion="1.2.9", apikey=None, secure=False):
self.hostname = hostname
if secure:
self.protocol = "https"
else:
self.protocol = "http"
self.apiurl = self.protocol+"://"+hostname
if port:
self.apiurl += ":{0}".format(port)
self.apiurl += "{0}{1}/".format(apipath, apiversion)
self.apikey = apikey
def listAllPads (self):
data = {'apikey': self.apikey}
url = self.apiurl+'listAllPads?'+urlencode(data)
return json.load(urlopen(url))['data']['padIDs']
def listAllGroups (self):
data = {'apikey': self.apikey}
url = self.apiurl+'listAllGroups?'+urlencode(data)
return json.load(urlopen(url))['data']['groupIDs']
def getPadText (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
return json.load(urlopen(self.apiurl+'getText?'+urlencode(data)))['data']['text']
def getPadHTML (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
return json.load(urlopen(self.apiurl+'getHTML?'+urlencode(data)))['data']['html']
def getPadLastEdited (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
raw = json.load(urlopen(self.apiurl+'getLastEdited?'+urlencode(data)))['data']['lastEdited']
try:
return datetime.fromtimestamp(int(raw)/1000)
except TypeError as e:
return None
def getPadURL (self, padID, groupinfo=None):
group, name = pad_split_group(padID)
if group:
gid = group
if gid.startswith("g."):
gid = gid[2:]
if groupinfo:
ginfo = groupinfo.get(gid)
if ginfo:
groupID = ginfo.get("id", 0)
else:
groupID = 0
else:
groupID = 0
return self.protocol+"://"+self.hostname+"/group.html/"+str(groupID)+"/pad.html/"+padID
else:
return self.protocol+"://"+self.hostname+"/public_pad/"+padID
def get_template_env (tpath=None):
import jinja2
@ -291,6 +236,8 @@ if __name__ == "__main__":
parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file')
parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
parser.add_argument('--pad', default="start", help='(history) pad id')
parser.add_argument('--rev', default="", help='(history) revision id')
args = parser.parse_args()
@ -448,6 +395,19 @@ if __name__ == "__main__":
if args.output:
out.close()
elif cmd == "revisions":
print (padserver.getRevisionsCount(args.pad))
elif cmd == "authors":
print (padserver.listAuthorsOfPad(args.pad))
elif cmd == "changeset":
print (padserver.getRevisionChangeset(args.pad, args.rev))
elif cmd == "history":
revs = padserver.getRevisionsCount(args.pad)
data = padserver.createDiffHTML(args.pad, 1, revs)
print (data['html'])
else:
print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr)

View File

@ -8,6 +8,7 @@ from urllib2 import urlopen, HTTPError, URLError
p = ArgumentParser("")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--list", default=False, action="store_true", help="display one per line")
args = p.parse_args()
with open(args.padinfo) as f:
@ -19,5 +20,10 @@ requesturl = apiurl+'listAllPads?'+urlencode(data)
if args.showurl:
print requesturl
else:
print json.dumps(json.load(urlopen(requesturl))['data']['padIDs'])
results = json.load(urlopen(requesturl))['data']['padIDs']
if args.list:
for r in results:
print r
else:
print json.dumps(results)