updated dump_html
This commit is contained in:
parent
76cb1b28a1
commit
b87674e050
@ -24,9 +24,9 @@ The easiest way to use etherdump is to create a padinfo JSON file that contains
|
|||||||
cp padinfo.sample.json padinfo.json
|
cp padinfo.sample.json padinfo.json
|
||||||
nano padinfo.json
|
nano padinfo.json
|
||||||
|
|
||||||
And then...
|
And then for instance:
|
||||||
|
|
||||||
etherdump --padinfo padinfo.json list
|
etherdump --padinfo padinfo.json listpads
|
||||||
|
|
||||||
|
|
||||||
listpads
|
listpads
|
||||||
|
@ -6,7 +6,7 @@ from urllib import urlencode
|
|||||||
from urllib2 import urlopen, HTTPError, URLError
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
|
|
||||||
p = ArgumentParser("")
|
p = ArgumentParser("")
|
||||||
p.add_argument("padid")
|
p.add_argument("padid", help="the padid")
|
||||||
p.add_argument("--startrev", type=int, default=0, help="starting revision")
|
p.add_argument("--startrev", type=int, default=0, help="starting revision")
|
||||||
p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last")
|
p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last")
|
||||||
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||||
|
36
dump_html.py
36
dump_html.py
@ -2,12 +2,14 @@
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import json, sys, os
|
import json, sys, os
|
||||||
|
from datetime import datetime
|
||||||
|
import html5lib
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urllib2 import urlopen, HTTPError, URLError
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
from xml.etree import cElementTree as ET
|
from xml.etree import cElementTree as ET
|
||||||
import html5lib
|
|
||||||
from trim import trim_removed_spans, contents
|
from trim import trim_removed_spans, contents
|
||||||
from linkify import linkify, urlify
|
from linkify import linkify, urlify
|
||||||
|
import jinja2
|
||||||
|
|
||||||
|
|
||||||
p = ArgumentParser("")
|
p = ArgumentParser("")
|
||||||
@ -16,8 +18,19 @@ p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padi
|
|||||||
p.add_argument("--path", default="output", help="path to save files, default: output")
|
p.add_argument("--path", default="output", help="path to save files, default: output")
|
||||||
p.add_argument("--verbose", default=False, action="store_true")
|
p.add_argument("--verbose", default=False, action="store_true")
|
||||||
p.add_argument("--limit", type=int, default=None)
|
p.add_argument("--limit", type=int, default=None)
|
||||||
|
p.add_argument("--templates", default="templates")
|
||||||
|
p.add_argument("--template", default="pad_html.html")
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
|
def get_template_env (tpath=None):
|
||||||
|
paths = []
|
||||||
|
if tpath and os.path.isdir(tpath):
|
||||||
|
paths.append(tpath)
|
||||||
|
# paths.append(TEMPLATES_PATH)
|
||||||
|
loader = jinja2.FileSystemLoader(paths)
|
||||||
|
env = jinja2.Environment(loader=loader)
|
||||||
|
return env
|
||||||
|
|
||||||
with open(args.padinfo) as f:
|
with open(args.padinfo) as f:
|
||||||
info = json.load(f)
|
info = json.load(f)
|
||||||
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||||
@ -26,6 +39,9 @@ todo = [args.padid]
|
|||||||
done = set()
|
done = set()
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
|
env = get_template_env(args.templates)
|
||||||
|
template = env.get_template(args.template)
|
||||||
|
|
||||||
while len(todo) > 0:
|
while len(todo) > 0:
|
||||||
padid = todo[0]
|
padid = todo[0]
|
||||||
todo = todo[1:]
|
todo = todo[1:]
|
||||||
@ -62,7 +78,23 @@ while len(todo) > 0:
|
|||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
with open(out, "w") as f:
|
with open(out, "w") as f:
|
||||||
f.write(html.encode("utf-8"))
|
t = html5lib.parse(html, namespaceHTMLElements=False)
|
||||||
|
style = t.find(".//style")
|
||||||
|
if style != None:
|
||||||
|
style = ET.tostring(style, method="html")
|
||||||
|
else:
|
||||||
|
style = ""
|
||||||
|
body = t.find(".//body")
|
||||||
|
html = contents(body)
|
||||||
|
|
||||||
|
# f.write(html.encode("utf-8"))
|
||||||
|
f.write(template.render(
|
||||||
|
html = html,
|
||||||
|
style = style,
|
||||||
|
revision = total_revisions,
|
||||||
|
padid = padid,
|
||||||
|
timestamp = datetime.now()
|
||||||
|
).encode("utf-8"))
|
||||||
|
|
||||||
count += 1
|
count += 1
|
||||||
if args.limit and count >= args.limit:
|
if args.limit and count >= args.limit:
|
||||||
|
74
etherdump
74
etherdump
@ -8,6 +8,8 @@ from xml.etree import ElementTree as ET
|
|||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from padserver import PadServer
|
||||||
|
|
||||||
|
|
||||||
PADINFO_DEFAULTS = {
|
PADINFO_DEFAULTS = {
|
||||||
"hostname": "",
|
"hostname": "",
|
||||||
@ -34,63 +36,6 @@ def content(tag):
|
|||||||
else:
|
else:
|
||||||
return tag.text + u''.join(ET.tostring(e) for e in tag)
|
return tag.text + u''.join(ET.tostring(e) for e in tag)
|
||||||
|
|
||||||
class PadServer (object):
|
|
||||||
def __init__ (self, hostname, port=9001, apipath="/api/", apiversion="1.2.9", apikey=None, secure=False):
|
|
||||||
self.hostname = hostname
|
|
||||||
if secure:
|
|
||||||
self.protocol = "https"
|
|
||||||
else:
|
|
||||||
self.protocol = "http"
|
|
||||||
|
|
||||||
self.apiurl = self.protocol+"://"+hostname
|
|
||||||
if port:
|
|
||||||
self.apiurl += ":{0}".format(port)
|
|
||||||
self.apiurl += "{0}{1}/".format(apipath, apiversion)
|
|
||||||
self.apikey = apikey
|
|
||||||
|
|
||||||
def listAllPads (self):
|
|
||||||
data = {'apikey': self.apikey}
|
|
||||||
url = self.apiurl+'listAllPads?'+urlencode(data)
|
|
||||||
return json.load(urlopen(url))['data']['padIDs']
|
|
||||||
|
|
||||||
def listAllGroups (self):
|
|
||||||
data = {'apikey': self.apikey}
|
|
||||||
url = self.apiurl+'listAllGroups?'+urlencode(data)
|
|
||||||
return json.load(urlopen(url))['data']['groupIDs']
|
|
||||||
|
|
||||||
def getPadText (self, padID):
|
|
||||||
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
|
|
||||||
return json.load(urlopen(self.apiurl+'getText?'+urlencode(data)))['data']['text']
|
|
||||||
|
|
||||||
def getPadHTML (self, padID):
|
|
||||||
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
|
|
||||||
return json.load(urlopen(self.apiurl+'getHTML?'+urlencode(data)))['data']['html']
|
|
||||||
|
|
||||||
def getPadLastEdited (self, padID):
|
|
||||||
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
|
|
||||||
raw = json.load(urlopen(self.apiurl+'getLastEdited?'+urlencode(data)))['data']['lastEdited']
|
|
||||||
try:
|
|
||||||
return datetime.fromtimestamp(int(raw)/1000)
|
|
||||||
except TypeError as e:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def getPadURL (self, padID, groupinfo=None):
|
|
||||||
group, name = pad_split_group(padID)
|
|
||||||
if group:
|
|
||||||
gid = group
|
|
||||||
if gid.startswith("g."):
|
|
||||||
gid = gid[2:]
|
|
||||||
if groupinfo:
|
|
||||||
ginfo = groupinfo.get(gid)
|
|
||||||
if ginfo:
|
|
||||||
groupID = ginfo.get("id", 0)
|
|
||||||
else:
|
|
||||||
groupID = 0
|
|
||||||
else:
|
|
||||||
groupID = 0
|
|
||||||
return self.protocol+"://"+self.hostname+"/group.html/"+str(groupID)+"/pad.html/"+padID
|
|
||||||
else:
|
|
||||||
return self.protocol+"://"+self.hostname+"/public_pad/"+padID
|
|
||||||
|
|
||||||
def get_template_env (tpath=None):
|
def get_template_env (tpath=None):
|
||||||
import jinja2
|
import jinja2
|
||||||
@ -291,6 +236,8 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file')
|
parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file')
|
||||||
parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
|
parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
|
||||||
|
|
||||||
|
parser.add_argument('--pad', default="start", help='(history) pad id')
|
||||||
|
parser.add_argument('--rev', default="", help='(history) revision id')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@ -448,6 +395,19 @@ if __name__ == "__main__":
|
|||||||
if args.output:
|
if args.output:
|
||||||
out.close()
|
out.close()
|
||||||
|
|
||||||
|
elif cmd == "revisions":
|
||||||
|
print (padserver.getRevisionsCount(args.pad))
|
||||||
|
|
||||||
|
elif cmd == "authors":
|
||||||
|
print (padserver.listAuthorsOfPad(args.pad))
|
||||||
|
|
||||||
|
elif cmd == "changeset":
|
||||||
|
print (padserver.getRevisionChangeset(args.pad, args.rev))
|
||||||
|
|
||||||
|
elif cmd == "history":
|
||||||
|
revs = padserver.getRevisionsCount(args.pad)
|
||||||
|
data = padserver.createDiffHTML(args.pad, 1, revs)
|
||||||
|
print (data['html'])
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr)
|
print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr)
|
||||||
|
@ -8,6 +8,7 @@ from urllib2 import urlopen, HTTPError, URLError
|
|||||||
p = ArgumentParser("")
|
p = ArgumentParser("")
|
||||||
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||||
p.add_argument("--showurl", default=False, action="store_true")
|
p.add_argument("--showurl", default=False, action="store_true")
|
||||||
|
p.add_argument("--list", default=False, action="store_true", help="display one per line")
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
with open(args.padinfo) as f:
|
with open(args.padinfo) as f:
|
||||||
@ -19,5 +20,10 @@ requesturl = apiurl+'listAllPads?'+urlencode(data)
|
|||||||
if args.showurl:
|
if args.showurl:
|
||||||
print requesturl
|
print requesturl
|
||||||
else:
|
else:
|
||||||
print json.dumps(json.load(urlopen(requesturl))['data']['padIDs'])
|
results = json.load(urlopen(requesturl))['data']['padIDs']
|
||||||
|
if args.list:
|
||||||
|
for r in results:
|
||||||
|
print r
|
||||||
|
else:
|
||||||
|
print json.dumps(results)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user