self contained sync command with per output options and all flag

This commit is contained in:
Michael Murtaugh 2015-11-19 12:47:03 +01:00
parent a040c3b3e5
commit aa4f478e2f

View File

@ -36,6 +36,11 @@ def main (args):
p.add_argument("--pub", default="pub", help="pub path for output, default: pub") p.add_argument("--pub", default="pub", help="pub path for output, default: pub")
p.add_argument("--group", default="g", help="group path for output, default: g") p.add_argument("--group", default="g", help="group path for output, default: g")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--meta", default=False, action="store_true", help="download meta to file, default: False")
p.add_argument("--text", default=False, action="store_true", help="download text to file, default: False")
p.add_argument("--html", default=False, action="store_true", help="download html to file, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to file, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
args = p.parse_args(args) args = p.parse_args(args)
info = load_padinfo(args.padinfo) info = load_padinfo(args.padinfo)
@ -48,7 +53,7 @@ def main (args):
padids = jsonload(info['api']+'listAllPads?'+urlencode(data))['data']['padIDs'] padids = jsonload(info['api']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids.sort() padids.sort()
numpads = len(padids) numpads = len(padids)
maxmsglen = 0 # maxmsglen = 0
count = 0 count = 0
for i, padid in enumerate(padids): for i, padid in enumerate(padids):
if args.skip != None and i<args.skip: if args.skip != None and i<args.skip:
@ -58,46 +63,88 @@ def main (args):
bars = int(ceil(p*20)) bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars)) bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid) msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
if len(msg) > maxmsglen: # if len(msg) > maxmsglen:
maxmsglen = len(msg) # maxmsglen = len(msg)
sys.stderr.write("\r{0}".format(" "*maxmsglen)) # sys.stderr.write("\r{0}".format(" "*maxmsglen))
sys.stderr.write(msg.encode("utf-8")) sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush() sys.stderr.flush()
data['padID'] = padid.encode("utf-8") data['padID'] = padid.encode("utf-8")
p = padpath(padid, args.pub, args.group) p = padpath(padid, args.pub, args.group)
metapath = p + ".meta.json" metapath = p + ".meta.json"
revisions = None revisions = None
if os.path.exists(metapath): tries = 1
with open(metapath) as f: skip = False
meta = json.load(f) while True:
revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] try:
if meta['revisions'] == revisions: if os.path.exists(metapath):
continue with open(metapath) as f:
meta = json.load(f)
meta = {'padid': padid.encode("utf-8")} revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
if revisions == None: if meta['revisions'] == revisions:
meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] skip=True
else: break
meta['revisions' ] = revisions
meta = {'padid': padid.encode("utf-8")}
if revisions == None:
meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
else:
meta['revisions' ] = revisions
if (meta['revisions'] == 0) and (not args.zerorevs): if (meta['revisions'] == 0) and (not args.zerorevs):
# print("Skipping zero revs", file=sys.stderr) # print("Skipping zero revs", file=sys.stderr)
skip=True
break
# todo: load more metadata!
meta['pad'], meta['group'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
meta['author_ids'] = jsonload(info['api']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
break
except HTTPError as e:
tries += 1
if tries > 3:
print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr)
skip=True
break
if skip:
continue continue
count += 1 count += 1
# todo: load more metadata!
meta['pad'], meta['group'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
meta['author_ids'] = jsonload(info['api'] +'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
# save it print (padid.encode("utf-8"))
try:
os.makedirs(os.path.split(metapath)[0])
except OSError:
pass
with open(metapath, "w") as f:
json.dump(meta, f)
print("\nWrote {0} files...".format(count), file=sys.stderr) if args.all or (args.meta or args.text or args.html or args.dhtml):
try:
os.makedirs(os.path.split(metapath)[0])
except OSError:
pass
if args.all or args.meta:
with open(metapath, "w") as f:
json.dump(meta, f)
# Process text, html, dhtml, all options
if args.all or args.text:
text = jsonload(info['api']+'getText?'+urlencode(data))
text = text['data']['text']
with open(p+".txt", "w") as f:
f.write(text.encode("utf-8"))
if args.all or args.html:
html = jsonload(info['api']+'getHTML?'+urlencode(data))
html = html['data']['html']
with open(p+".html", "w") as f:
f.write(html.encode("utf-8"))
if args.all or args.dhtml:
data['startRev'] = "0"
html = jsonload(info['api']+'createDiffHTML?'+urlencode(data))
html = html['data']['html']
with open(p+".diff.html", "w") as f:
f.write(html.encode("utf-8"))
print("\n{0} pad(s) changed".format(count), file=sys.stderr)