Browse Source

updated dump_html

add-quote-import
Michael Murtaugh 9 years ago
parent
commit
b87674e050
  1. 4
      README.md
  2. 2
      createDiffHTML.py
  3. 36
      dump_html.py
  4. 74
      etherdump
  5. 8
      listAllPads.py

4
README.md

@ -24,9 +24,9 @@ The easiest way to use etherdump is to create a padinfo JSON file that contains
cp padinfo.sample.json padinfo.json
nano padinfo.json
And then...
And then for instance:
etherdump --padinfo padinfo.json list
etherdump --padinfo padinfo.json listpads
listpads

2
createDiffHTML.py

@ -6,7 +6,7 @@ from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
p = ArgumentParser("")
p.add_argument("padid")
p.add_argument("padid", help="the padid")
p.add_argument("--startrev", type=int, default=0, help="starting revision")
p.add_argument("--endrev", type=int, default=None, help="ending revision, default: last")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")

36
dump_html.py

@ -2,12 +2,14 @@
from __future__ import print_function
from argparse import ArgumentParser
import json, sys, os
from datetime import datetime
import html5lib
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from xml.etree import cElementTree as ET
import html5lib
from trim import trim_removed_spans, contents
from linkify import linkify, urlify
import jinja2
p = ArgumentParser("")
@ -16,8 +18,19 @@ p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padi
p.add_argument("--path", default="output", help="path to save files, default: output")
p.add_argument("--verbose", default=False, action="store_true")
p.add_argument("--limit", type=int, default=None)
p.add_argument("--templates", default="templates")
p.add_argument("--template", default="pad_html.html")
args = p.parse_args()
def get_template_env (tpath=None):
paths = []
if tpath and os.path.isdir(tpath):
paths.append(tpath)
# paths.append(TEMPLATES_PATH)
loader = jinja2.FileSystemLoader(paths)
env = jinja2.Environment(loader=loader)
return env
with open(args.padinfo) as f:
info = json.load(f)
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
@ -26,6 +39,9 @@ todo = [args.padid]
done = set()
count = 0
env = get_template_env(args.templates)
template = env.get_template(args.template)
while len(todo) > 0:
padid = todo[0]
todo = todo[1:]
@ -62,7 +78,23 @@ while len(todo) > 0:
except OSError:
pass
with open(out, "w") as f:
f.write(html.encode("utf-8"))
t = html5lib.parse(html, namespaceHTMLElements=False)
style = t.find(".//style")
if style != None:
style = ET.tostring(style, method="html")
else:
style = ""
body = t.find(".//body")
html = contents(body)
# f.write(html.encode("utf-8"))
f.write(template.render(
html = html,
style = style,
revision = total_revisions,
padid = padid,
timestamp = datetime.now()
).encode("utf-8"))
count += 1
if args.limit and count >= args.limit:

74
etherdump

@ -8,6 +8,8 @@ from xml.etree import ElementTree as ET
from urllib import urlencode
from urlparse import urljoin
from datetime import datetime
from padserver import PadServer
PADINFO_DEFAULTS = {
"hostname": "",
@ -34,63 +36,6 @@ def content(tag):
else:
return tag.text + u''.join(ET.tostring(e) for e in tag)
class PadServer (object):
def __init__ (self, hostname, port=9001, apipath="/api/", apiversion="1.2.9", apikey=None, secure=False):
self.hostname = hostname
if secure:
self.protocol = "https"
else:
self.protocol = "http"
self.apiurl = self.protocol+"://"+hostname
if port:
self.apiurl += ":{0}".format(port)
self.apiurl += "{0}{1}/".format(apipath, apiversion)
self.apikey = apikey
def listAllPads (self):
data = {'apikey': self.apikey}
url = self.apiurl+'listAllPads?'+urlencode(data)
return json.load(urlopen(url))['data']['padIDs']
def listAllGroups (self):
data = {'apikey': self.apikey}
url = self.apiurl+'listAllGroups?'+urlencode(data)
return json.load(urlopen(url))['data']['groupIDs']
def getPadText (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
return json.load(urlopen(self.apiurl+'getText?'+urlencode(data)))['data']['text']
def getPadHTML (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
return json.load(urlopen(self.apiurl+'getHTML?'+urlencode(data)))['data']['html']
def getPadLastEdited (self, padID):
data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")}
raw = json.load(urlopen(self.apiurl+'getLastEdited?'+urlencode(data)))['data']['lastEdited']
try:
return datetime.fromtimestamp(int(raw)/1000)
except TypeError as e:
return None
def getPadURL (self, padID, groupinfo=None):
group, name = pad_split_group(padID)
if group:
gid = group
if gid.startswith("g."):
gid = gid[2:]
if groupinfo:
ginfo = groupinfo.get(gid)
if ginfo:
groupID = ginfo.get("id", 0)
else:
groupID = 0
else:
groupID = 0
return self.protocol+"://"+self.hostname+"/group.html/"+str(groupID)+"/pad.html/"+padID
else:
return self.protocol+"://"+self.hostname+"/public_pad/"+padID
def get_template_env (tpath=None):
import jinja2
@ -291,6 +236,8 @@ if __name__ == "__main__":
parser.add_argument('--groupinfo', default=None, help='(index) groupinfo json file')
parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
parser.add_argument('--pad', default="start", help='(history) pad id')
parser.add_argument('--rev', default="", help='(history) revision id')
args = parser.parse_args()
@ -448,6 +395,19 @@ if __name__ == "__main__":
if args.output:
out.close()
elif cmd == "revisions":
print (padserver.getRevisionsCount(args.pad))
elif cmd == "authors":
print (padserver.listAuthorsOfPad(args.pad))
elif cmd == "changeset":
print (padserver.getRevisionChangeset(args.pad, args.rev))
elif cmd == "history":
revs = padserver.getRevisionsCount(args.pad)
data = padserver.createDiffHTML(args.pad, 1, revs)
print (data['html'])
else:
print ("Command '{0}' not understood, try: listpads, listgroups, dump".format(args.command), file=sys.stderr)

8
listAllPads.py

@ -8,6 +8,7 @@ from urllib2 import urlopen, HTTPError, URLError
p = ArgumentParser("")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--list", default=False, action="store_true", help="display one per line")
args = p.parse_args()
with open(args.padinfo) as f:
@ -19,5 +20,10 @@ requesturl = apiurl+'listAllPads?'+urlencode(data)
if args.showurl:
print requesturl
else:
print json.dumps(json.load(urlopen(requesturl))['data']['padIDs'])
results = json.load(urlopen(requesturl))['data']['padIDs']
if args.list:
for r in results:
print r
else:
print json.dumps(results)

Loading…
Cancel
Save