diff --git a/README.md b/README.md
index f3c73dd..8f04e1a 100644
--- a/README.md
+++ b/README.md
@@ -56,4 +56,5 @@ etherdump sync
why
-------
-Etherdump is useful as a means of dumping the contents of etherpad to files, as a way of opening up the contents of the service to other services / methods / uses / tools / situations. (Files also of course allow for archival tools / methods)
\ No newline at end of file
+Etherdump is useful as a means of dumping the contents of etherpad to files, as a way of opening up the contents of the service to other services / methods / uses / tools / situations. (Files also of course allow for archival tools / methods)
+
diff --git a/etherdump/commands/common.py b/etherdump/commands/common.py
index 819730d..7e08b8f 100644
--- a/etherdump/commands/common.py
+++ b/etherdump/commands/common.py
@@ -1,7 +1,10 @@
+from __future__ import print_function
import re, os, json, sys
from urllib import quote_plus, unquote_plus
from math import ceil, floor
-from urllib2 import urlopen
+from urllib2 import urlopen, HTTPError
+from time import sleep
+
groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
@@ -39,11 +42,27 @@ def padpath2id (path):
else:
return p.decode("utf-8")
-def getjson (url):
- f = urlopen(url)
- data = f.read()
- f.close()
- return json.loads(data)
+def getjson (url, max_retry=3, retry_sleep_time=0.5):
+ ret = {}
+ ret["_retries"] = 0
+ while ret["_retries"] <= max_retry:
+ try:
+ f = urlopen(url)
+ data = f.read()
+ rurl = f.geturl()
+ f.close()
+ ret.update(json.loads(data))
+ ret["_code"] = f.getcode()
+ if rurl != url:
+ ret["_url"] = rurl
+ return ret
+ except HTTPError as e:
+ print ("HTTPError {0}".format(e), file=sys.stderr)
+ ret["_code"] = e.code
+ ret["_retries"]+=1
+ if retry_sleep_time:
+ sleep(retry_sleep_time)
+ return ret
def loadpadinfo(p):
with open(p) as f:
diff --git a/etherdump/commands/index.py b/etherdump/commands/index.py
index 4d5af65..f2490e4 100644
--- a/etherdump/commands/index.py
+++ b/etherdump/commands/index.py
@@ -6,6 +6,8 @@ import json, os, re
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from jinja2 import FileSystemLoader, Environment
+from datetime import datetime
+
def group (items, key=lambda x: x):
ret = []
@@ -33,28 +35,38 @@ def main(args):
tmpath = os.path.join(tmpath, "data", "templates")
env = Environment(loader=FileSystemLoader(tmpath))
- template = env.get_template("pad_index.html")
+ template = env.get_template("index.html")
+
+ def base (x):
+ return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
inputs = args.input
inputs.sort()
- inputs = [x for x in inputs if os.path.isdir(x)]
+ inputs = group(inputs, base)
- def base (x):
- return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
+ def loadmeta(paths):
+ for p in paths:
+ if p.endswith(".meta.json"):
+ with open(p) as f:
+ return json.load(f)
+
+ inputs = map(loadmeta, inputs)
+ # sort by last edited (reverse)
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+ print (template.render({"timestamp": timestamp, "pads": inputs}).encode("utf-8"))
# TODO: MODIFY THIS TO MAKE THE OUTPUT JOINABLE with the collected META DATA
# evt: how can the metadata become a GRAPH structure!!! with each output DOCUMENT
#
- print ("
")
- for x in inputs:
- padid = x
- metapath = os.path.join(x, "{0}.meta.json".format(padid))
- if os.path.exists(metapath):
- print ("""