From 568a8f0790180e8f4f35c92bfb2d6aca7d5d26d7 Mon Sep 17 00:00:00 2001
From: Michael Murtaugh <mm@automatist.org>
Date: Thu, 3 Dec 2015 14:08:25 +0100
Subject: [PATCH] more retries in pull, join to move into folders

---
 README.md                  |  3 +++
 etherdump/commands/join.py | 18 ++++++++++++++++--
 etherdump/commands/pull.py | 29 ++++++++++++++++++++++++-----
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 6dc88d2..f3c73dd 100644
--- a/README.md
+++ b/README.md
@@ -54,3 +54,6 @@ etherdump pull <padid/path>
 etherdump sync
 	push / pull file contents to pad
 
+why
+-------
+Etherdump is useful as a means of dumping the contents of etherpad to files, as a way of opening up the contents of the service to other services / methods / uses / tools / situations. (Files also of course allow for archival tools / methods)
\ No newline at end of file
diff --git a/etherdump/commands/join.py b/etherdump/commands/join.py
index 90a531c..5076568 100644
--- a/etherdump/commands/join.py
+++ b/etherdump/commands/join.py
@@ -25,7 +25,21 @@ def main(args):
 
     inputs = args.input
     inputs.sort()
+
+    inputs = [x for x in inputs if not os.path.isdir(x)]
+
     def base (x):
         return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
-    from pprint import pprint
-    pprint(group(inputs, base))
+    #from pprint import pprint
+    #pprint()
+    gg = group(inputs, base)
+    for items in gg:
+        itembase = base(items[0])
+        try:
+            os.mkdir(itembase)
+        except OSError:
+            pass
+        for i in items:
+            newloc = os.path.join(itembase, i)
+            print "'{0}' => '{1}'".format(i, newloc)
+            os.rename(i, newloc)
diff --git a/etherdump/commands/pull.py b/etherdump/commands/pull.py
index 18b3610..55985bc 100644
--- a/etherdump/commands/pull.py
+++ b/etherdump/commands/pull.py
@@ -6,6 +6,8 @@ from datetime import datetime
 from urllib import urlencode
 from urllib2 import HTTPError
 from common import *
+from time import sleep
+
 
 """
 pull(meta):
@@ -118,10 +120,27 @@ def main (args):
                 f.write(html.encode("utf-8"))
 
         if args.all or args.dhtml:
-            data['startRev'] = "0"
-            html = getjson(info['apiurl']+'createDiffHTML?'+urlencode(data))
-            html = html['data']['html']
-            with open(p+".diff.html", "w") as f:
-                f.write(html.encode("utf-8"))
+            tries = 0
+            skip = False
+            while not skip:
+                try:
+                    data['startRev'] = "0"
+                    html = getjson(info['apiurl']+'createDiffHTML?'+urlencode(data))
+                    html = html['data']['html']
+                    with open(p+".diff.html", "w") as f:
+                        f.write(html.encode("utf-8"))
+                    break
+                except HTTPError as e:
+                    print ("HTTPERROR {0}".format(e), file=sys.stderr)
+                    tries += 1
+                    if tries >= 5:
+                        print ("  Too many errors, deleting .diff.html and skipping", file=sys.stderr)
+                        try:
+                            os.remove(p+".diff.html")
+                        except OSError:
+                            pass
+                        skip=True
+                    else:
+                        sleep(0.1)
 
     print("\n{0} pad(s) changed".format(count), file=sys.stderr)