diff --git a/etherdump/commands/dumpcsv.py b/etherdump/commands/dumpcsv.py new file mode 100644 index 0000000..d390b94 --- /dev/null +++ b/etherdump/commands/dumpcsv.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +from __future__ import print_function +from argparse import ArgumentParser +import sys, json, re +from datetime import datetime +from urllib import urlencode +from urllib2 import urlopen, HTTPError, URLError +from csv import writer + +""" +Dumps a CSV of all pads with columns +padid, groupid, revisions, lastedited, author_ids + + padids have their group name trimmed + groupid is without (g. $) + revisions is an integral number of edits + lastedited is ISO8601 formatted + author_ids is a space delimited list of internal author IDs +""" + +groupnamepat = re.compile(r"^g\.(\w+)\$") + +out = writer(sys.stdout) + +def jsonload (url): + f = urlopen(url) + data = f.read() + f.close() + return json.loads(data) + +def main (args): + p = ArgumentParser("") + p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json") + p.add_argument("--format", default="csv", help="output format: csv (default), json") + args = p.parse_args(args) + + with open(args.padinfo) as f: + info = json.load(f) + apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) + data = {} + data['apikey'] = info['apikey'] + requesturl = apiurl+'listAllPads?'+urlencode(data) + + results = jsonload(requesturl)['data']['padIDs'] + results.sort() + out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids")) + for padid in results: + print (u"{0}".format(padid), file=sys.stderr) + m = groupnamepat.match(padid) + if m: + groupname = m.group(1) + padidnogroup = padid[m.end():] + else: + groupname = u"" + padidnogroup = padid + + data['padID'] = padid.encode("utf-8") + revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions'] + lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited'] + lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat() + author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] + author_ids = u" ".join(author_ids).encode("utf-8") + out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids)) + +