From 568a8f0790180e8f4f35c92bfb2d6aca7d5d26d7 Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Thu, 3 Dec 2015 14:08:25 +0100 Subject: [PATCH] more retries in pull, join to move into folders --- README.md | 3 +++ etherdump/commands/join.py | 18 ++++++++++++++++-- etherdump/commands/pull.py | 29 ++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6dc88d2..f3c73dd 100644 --- a/README.md +++ b/README.md @@ -54,3 +54,6 @@ etherdump pull etherdump sync push / pull file contents to pad +why +------- +Etherdump is useful as a means of dumping the contents of etherpad to files, as a way of opening up the contents of the service to other services / methods / uses / tools / situations. (Files also of course allow for archival tools / methods) \ No newline at end of file diff --git a/etherdump/commands/join.py b/etherdump/commands/join.py index 90a531c..5076568 100644 --- a/etherdump/commands/join.py +++ b/etherdump/commands/join.py @@ -25,7 +25,21 @@ def main(args): inputs = args.input inputs.sort() + + inputs = [x for x in inputs if not os.path.isdir(x)] + def base (x): return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x) - from pprint import pprint - pprint(group(inputs, base)) + #from pprint import pprint + #pprint() + gg = group(inputs, base) + for items in gg: + itembase = base(items[0]) + try: + os.mkdir(itembase) + except OSError: + pass + for i in items: + newloc = os.path.join(itembase, i) + print "'{0}' => '{1}'".format(i, newloc) + os.rename(i, newloc) diff --git a/etherdump/commands/pull.py b/etherdump/commands/pull.py index 18b3610..55985bc 100644 --- a/etherdump/commands/pull.py +++ b/etherdump/commands/pull.py @@ -6,6 +6,8 @@ from datetime import datetime from urllib import urlencode from urllib2 import HTTPError from common import * +from time import sleep + """ pull(meta): @@ -118,10 +120,27 @@ def main (args): f.write(html.encode("utf-8")) if args.all or args.dhtml: - data['startRev'] = "0" - html = getjson(info['apiurl']+'createDiffHTML?'+urlencode(data)) - html = html['data']['html'] - with open(p+".diff.html", "w") as f: - f.write(html.encode("utf-8")) + tries = 0 + skip = False + while not skip: + try: + data['startRev'] = "0" + html = getjson(info['apiurl']+'createDiffHTML?'+urlencode(data)) + html = html['data']['html'] + with open(p+".diff.html", "w") as f: + f.write(html.encode("utf-8")) + break + except HTTPError as e: + print ("HTTPERROR {0}".format(e), file=sys.stderr) + tries += 1 + if tries >= 5: + print (" Too many errors, deleting .diff.html and skipping", file=sys.stderr) + try: + os.remove(p+".diff.html") + except OSError: + pass + skip=True + else: + sleep(0.1) print("\n{0} pad(s) changed".format(count), file=sys.stderr)