|
|
@ -36,6 +36,11 @@ def main (args): |
|
|
|
p.add_argument("--pub", default="pub", help="pub path for output, default: pub") |
|
|
|
p.add_argument("--group", default="g", help="group path for output, default: g") |
|
|
|
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") |
|
|
|
p.add_argument("--meta", default=False, action="store_true", help="download meta to file, default: False") |
|
|
|
p.add_argument("--text", default=False, action="store_true", help="download text to file, default: False") |
|
|
|
p.add_argument("--html", default=False, action="store_true", help="download html to file, default: False") |
|
|
|
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to file, default: False") |
|
|
|
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") |
|
|
|
args = p.parse_args(args) |
|
|
|
|
|
|
|
info = load_padinfo(args.padinfo) |
|
|
@ -48,7 +53,7 @@ def main (args): |
|
|
|
padids = jsonload(info['api']+'listAllPads?'+urlencode(data))['data']['padIDs'] |
|
|
|
padids.sort() |
|
|
|
numpads = len(padids) |
|
|
|
maxmsglen = 0 |
|
|
|
# maxmsglen = 0 |
|
|
|
count = 0 |
|
|
|
for i, padid in enumerate(padids): |
|
|
|
if args.skip != None and i<args.skip: |
|
|
@ -58,46 +63,88 @@ def main (args): |
|
|
|
bars = int(ceil(p*20)) |
|
|
|
bar = ("*"*bars) + ("-"*(20-bars)) |
|
|
|
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid) |
|
|
|
if len(msg) > maxmsglen: |
|
|
|
maxmsglen = len(msg) |
|
|
|
sys.stderr.write("\r{0}".format(" "*maxmsglen)) |
|
|
|
# if len(msg) > maxmsglen: |
|
|
|
# maxmsglen = len(msg) |
|
|
|
# sys.stderr.write("\r{0}".format(" "*maxmsglen)) |
|
|
|
sys.stderr.write(msg.encode("utf-8")) |
|
|
|
sys.stderr.flush() |
|
|
|
|
|
|
|
data['padID'] = padid.encode("utf-8") |
|
|
|
p = padpath(padid, args.pub, args.group) |
|
|
|
metapath = p + ".meta.json" |
|
|
|
revisions = None |
|
|
|
if os.path.exists(metapath): |
|
|
|
with open(metapath) as f: |
|
|
|
meta = json.load(f) |
|
|
|
revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] |
|
|
|
if meta['revisions'] == revisions: |
|
|
|
continue |
|
|
|
|
|
|
|
meta = {'padid': padid.encode("utf-8")} |
|
|
|
if revisions == None: |
|
|
|
meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] |
|
|
|
else: |
|
|
|
meta['revisions' ] = revisions |
|
|
|
|
|
|
|
if (meta['revisions'] == 0) and (not args.zerorevs): |
|
|
|
# print("Skipping zero revs", file=sys.stderr) |
|
|
|
tries = 1 |
|
|
|
skip = False |
|
|
|
while True: |
|
|
|
try: |
|
|
|
if os.path.exists(metapath): |
|
|
|
with open(metapath) as f: |
|
|
|
meta = json.load(f) |
|
|
|
revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] |
|
|
|
if meta['revisions'] == revisions: |
|
|
|
skip=True |
|
|
|
break |
|
|
|
|
|
|
|
meta = {'padid': padid.encode("utf-8")} |
|
|
|
if revisions == None: |
|
|
|
meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] |
|
|
|
else: |
|
|
|
meta['revisions' ] = revisions |
|
|
|
|
|
|
|
if (meta['revisions'] == 0) and (not args.zerorevs): |
|
|
|
# print("Skipping zero revs", file=sys.stderr) |
|
|
|
skip=True |
|
|
|
break |
|
|
|
|
|
|
|
# todo: load more metadata! |
|
|
|
meta['pad'], meta['group'] = splitpadname(padid) |
|
|
|
meta['pathbase'] = p |
|
|
|
meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) |
|
|
|
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() |
|
|
|
meta['author_ids'] = jsonload(info['api']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] |
|
|
|
break |
|
|
|
except HTTPError as e: |
|
|
|
tries += 1 |
|
|
|
if tries > 3: |
|
|
|
print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr) |
|
|
|
skip=True |
|
|
|
break |
|
|
|
|
|
|
|
if skip: |
|
|
|
continue |
|
|
|
|
|
|
|
count += 1 |
|
|
|
# todo: load more metadata! |
|
|
|
meta['pad'], meta['group'] = splitpadname(padid) |
|
|
|
meta['pathbase'] = p |
|
|
|
meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) |
|
|
|
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() |
|
|
|
meta['author_ids'] = jsonload(info['api'] +'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] |
|
|
|
|
|
|
|
# save it |
|
|
|
try: |
|
|
|
os.makedirs(os.path.split(metapath)[0]) |
|
|
|
except OSError: |
|
|
|
pass |
|
|
|
with open(metapath, "w") as f: |
|
|
|
json.dump(meta, f) |
|
|
|
|
|
|
|
print("\nWrote {0} files...".format(count), file=sys.stderr) |
|
|
|
|
|
|
|
print (padid.encode("utf-8")) |
|
|
|
|
|
|
|
if args.all or (args.meta or args.text or args.html or args.dhtml): |
|
|
|
try: |
|
|
|
os.makedirs(os.path.split(metapath)[0]) |
|
|
|
except OSError: |
|
|
|
pass |
|
|
|
|
|
|
|
if args.all or args.meta: |
|
|
|
with open(metapath, "w") as f: |
|
|
|
json.dump(meta, f) |
|
|
|
|
|
|
|
# Process text, html, dhtml, all options |
|
|
|
if args.all or args.text: |
|
|
|
text = jsonload(info['api']+'getText?'+urlencode(data)) |
|
|
|
text = text['data']['text'] |
|
|
|
with open(p+".txt", "w") as f: |
|
|
|
f.write(text.encode("utf-8")) |
|
|
|
|
|
|
|
if args.all or args.html: |
|
|
|
html = jsonload(info['api']+'getHTML?'+urlencode(data)) |
|
|
|
html = html['data']['html'] |
|
|
|
with open(p+".html", "w") as f: |
|
|
|
f.write(html.encode("utf-8")) |
|
|
|
|
|
|
|
if args.all or args.dhtml: |
|
|
|
data['startRev'] = "0" |
|
|
|
html = jsonload(info['api']+'createDiffHTML?'+urlencode(data)) |
|
|
|
html = html['data']['html'] |
|
|
|
with open(p+".diff.html", "w") as f: |
|
|
|
f.write(html.encode("utf-8")) |
|
|
|
|
|
|
|
print("\n{0} pad(s) changed".format(count), file=sys.stderr) |
|
|
|