Pumping pads as files into publishing frameworks!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

69 lines
2.1 KiB

#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, sys, os
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from xml.etree import cElementTree as ET
import html5lib
from trim import trim_removed_spans, contents
from linkify import linkify, urlify
p = ArgumentParser("")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--path", default="output", help="path to save files, default: output")
p.add_argument("--verbose", default=False, action="store_true")
p.add_argument("--limit", type=int, default=None)
args = p.parse_args()
with open(args.padinfo) as f:
info = json.load(f)
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
todo = [args.padid]
done = set()
count = 0
while len(todo) > 0:
padid = todo[0]
todo = todo[1:]
done.add(padid)
data = {}
data['apikey'] = info['apikey']
data['padID'] = padid.encode("utf-8")
out = "{0}/{1}".format(args.path, urlify(padid))
print ("{0}".format(out), file=sys.stderr)
total_revisions = apiurl+'getRevisionsCount?'+urlencode(data)
total_revisions = json.load(urlopen(total_revisions))['data']['revisions']
if args.verbose:
print (u"{0} has {1} total revisions...".format(padid, total_revisions).encode("utf-8"), file=sys.stderr)
data['startRev'] = "0"
requesturl = apiurl+'createDiffHTML?'+urlencode(data)
html = json.load(urlopen(requesturl))['data']['html']
t = html5lib.parse(html, namespaceHTMLElements=False)
trim_removed_spans(t)
html = ET.tostring(t, method="html")
html, links = linkify(html)
for l in links:
if l not in todo and l not in done:
if args.verbose:
print (" link: {0}".format(l), file=sys.stderr)
todo.append(l)
try:
os.makedirs(args.path)
except OSError:
pass
with open(out, "w") as f:
f.write(html.encode("utf-8"))
count += 1
if args.limit and count >= args.limit:
break