updated dump_html
This commit is contained in:
parent
76cb1b28a1
commit
b87674e050
@ -24,9 +24,9 @@ The easiest way to use etherdump is to create a padinfo JSON file that contains
|
||||
cp padinfo.sample.json padinfo.json
|
||||
nano padinfo.json
|
||||
|
||||
And then...
|
||||
And then for instance:
|
||||
|
||||
etherdump --padinfo padinfo.json list
|
||||
etherdump --padinfo padinfo.json listpads
|
||||
|
||||
|
||||
listpads
|
||||
|
@ -6,7 +6,7 @@ from urllib import urlencode
|
||||
from urllib2 import urlopen, HTTPError, URLError
|
||||
|
||||
p = ArgumentParser("")
|
||||
p.add_argument("padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--startrev", type=int, default=0, help="starting revision")
|
||||
p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last")
|
||||
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||
|
36
dump_html.py
36
dump_html.py
@ -2,12 +2,14 @@
|
||||
from __future__ import print_function
|
||||
from argparse import ArgumentParser
|
||||
import json, sys, os
|
||||
from datetime import datetime
|
||||
import html5lib
|
||||
from urllib import urlencode
|
||||
from urllib2 import urlopen, HTTPError, URLError
|
||||
from xml.etree import cElementTree as ET
|
||||
import html5lib
|
||||
from trim import trim_removed_spans, contents
|
||||
from linkify import linkify, urlify
|
||||
import jinja2
|
||||
|
||||
|
||||
p = ArgumentParser("")
|
||||
@ -16,8 +18,19 @@ p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padi
|
||||
p.add_argument("--path", default="output", help="path to save files, default: output")
|
||||
p.add_argument("--verbose", default=False, action="store_true")
|
||||
p.add_argument("--limit", type=int, default=None)
|
||||
p.add_argument("--templates", default="templates")
|
||||
p.add_argument("--template", default="pad_html.html")
|
||||
args = p.parse_args()
|
||||
|
||||
def get_template_env (tpath=None):
|
||||
paths = []
|
||||
if tpath and os.path.isdir(tpath):
|
||||
paths.append(tpath)
|
||||
# paths.append(TEMPLATES_PATH)
|
||||
loader = jinja2.FileSystemLoader(paths)
|
||||
env = jinja2.Environment(loader=loader)
|
||||
return env
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
info = json.load(f)
|
||||
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||
@ -26,6 +39,9 @@ todo = [args.padid]
|
||||
done = set()
|
||||
count = 0
|
||||
|
||||
env = get_template_env(args.templates)
|
||||
template = env.get_template(args.template)
|
||||
|
||||
while len(todo) > 0:
|
||||
padid = todo[0]
|
||||
todo = todo[1:]
|
||||
@ -62,7 +78,23 @@ while len(todo) > 0:
|
||||
except OSError:
|
||||
pass
|
||||
with open(out, "w") as f:
|
||||
f.write(html.encode("utf-8"))
|
||||
t = html5lib.parse(html, namespaceHTMLElements=False)
|
||||
style = t.find(".//style")
|
||||
if style != None:
|
||||
style = ET.tostring(style, method="html")
|
||||
else:
|
||||
style = ""
|
||||
body = t.find(".//body")
|
||||
html = contents(body)
|
||||
|
||||
# f.write(html.encode("utf-8"))
|
||||
f.write(template.render(
|
||||
html = html,
|
||||
style = style,
|
||||
revision = total_revisions,
|
||||
padid = padid,
|
||||
timestamp = datetime.now()
|
||||
).encode("utf-8"))
|
||||
|
||||
count += 1
|
||||
if args.limit and count >= args.limit:
|
||||
|
74
etherdump
74
etherdump
@ -8,6 +8,8 @@ from xml.etree import ElementTree as ET
|
||||
from urllib import urlencode
|
||||
from urlparse import urljoin
|
||||
from datetime import datetime
|
||||
from padserver import PadServer
|
||||
|
||||
|
||||
PADINFO_DEFAULTS = {
|
||||
"hostname": "",
|
||||
@ -34,63 +36,6 @@ def content(tag):
|
||||
else:
|
||||
return tag.text + u''.join(ET.tostring(e) for e in tag)
|
||||
|
||||
class PadServer (object):
|
||||
def __init__ (self, hostname, port=9001, apipath="/api/", apiversion="1.2.9", apikey=None, secure=False):
|
||||
self.hostname = hostname
|
||||
if secure:
|
||||
self.protocol = "https"
|
||||
else:
|
||||
self.protocol = "http"
|
||||
|
||||
self.apiurl = self.protocol+"://"+hostname
|
||||
if port:
|
||||
self.apiurl += ":{0}".format(port)
|
||||
self.apiurl += "{0}{1}/".format(apipath, apiversion)
|
||||
self.apikey = apikey
|
||||
|
||||
def listAllPads (self):
|
||||
data = {'apikey': self.apikey}
|
||||
url = self.apiurl+'listAllPads?'+urlencode(data)
|
||||
return json.load(urlopen(url))['data']['padIDs']
|
||||
|
||||
def listAllGroups (self):
|
||||
data = {'apikey': self.apikey}
|
||||
url = self.apiurl+'listAllGroups?'+urlencode(data)
|
||||
return json.load(urlopen(url))['data']['groupIDs']
|
||||
|
||||
def getPadText (self, padID):
|
||||
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
|
||||
return json.load(urlopen(self.apiurl+'getText?'+urlencode(data)))['data']['text']
|
||||
|
||||
def getPadHTML (self, padID):
|
||||
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
|
||||
return json.load(urlopen(self.apiurl+'getHTML?'+urlencode(data)))['data']['html']
|
||||
|
||||
def getPadLastEdited (self, padID):
|
||||
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
|
||||
raw = json.load(urlopen(self.apiurl+'getLastEdited?'+urlencode(data)))['data']['lastEdited']
|
||||
try:
|
||||
return datetime.fromtimestamp(int(raw)/1000)
|
||||
except TypeError as e:
|
||||
return None
|
||||
|
||||
def getPadURL (self, padID, groupinfo=None):
|
||||
group, name = pad_split_group(padID)
|
||||
if group:
|
||||
gid = group
|
||||
if gid.startswith("g."):
|
||||
gid = gid[2:]
|
||||
if groupinfo:
|
||||
ginfo = groupinfo.get(gid)
|
||||
if ginfo:
|
||||
groupID = ginfo.get("id", 0)
|
||||
else:
|
||||
groupID = 0
|
||||
else:
|
||||
groupID = 0
|
||||
return self.protocol+"://"+self.hostname+"/group.html/"+str(groupID)+"/pad.html/"+padID
|
||||
else:
|
||||
return self.protocol+"://"+self.hostname+"/public_pad/"+padID
|
||||
|
||||
def get_template_env (tpath=None):
|
||||
import jinja2
|
||||
@ -291,6 +236,8 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file')
|
||||
parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
|
||||
|
||||
parser.add_argument('--pad', default="start", help='(history) pad id')
|
||||
parser.add_argument('--rev', default="", help='(history) revision id')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -448,6 +395,19 @@ if __name__ == "__main__":
|
||||
if args.output:
|
||||
out.close()
|
||||
|
||||
elif cmd == "revisions":
|
||||
print (padserver.getRevisionsCount(args.pad))
|
||||
|
||||
elif cmd == "authors":
|
||||
print (padserver.listAuthorsOfPad(args.pad))
|
||||
|
||||
elif cmd == "changeset":
|
||||
print (padserver.getRevisionChangeset(args.pad, args.rev))
|
||||
|
||||
elif cmd == "history":
|
||||
revs = padserver.getRevisionsCount(args.pad)
|
||||
data = padserver.createDiffHTML(args.pad, 1, revs)
|
||||
print (data['html'])
|
||||
|
||||
else:
|
||||
print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr)
|
||||
|
@ -8,6 +8,7 @@ from urllib2 import urlopen, HTTPError, URLError
|
||||
p = ArgumentParser("")
|
||||
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
p.add_argument("--list", default=False, action="store_true", help="display one per line")
|
||||
args = p.parse_args()
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -19,5 +20,10 @@ requesturl = apiurl+'listAllPads?'+urlencode(data)
|
||||
if args.showurl:
|
||||
print requesturl
|
||||
else:
|
||||
print json.dumps(json.load(urlopen(requesturl))['data']['padIDs'])
|
||||
results = json.load(urlopen(requesturl))['data']['padIDs']
|
||||
if args.list:
|
||||
for r in results:
|
||||
print r
|
||||
else:
|
||||
print json.dumps(results)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user