added dumpcsv command

This commit is contained in:
Michael Murtaugh 2015-11-12 12:41:12 +01:00
parent 0f340433ae
commit 5a14737737

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re
from datetime import datetime
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from csv import writer
"""
Dumps a CSV of all pads with columns
padid, groupid, revisions, lastedited, author_ids
padids have their group name trimmed
groupid is without (g. $)
revisions is an integral number of edits
lastedited is ISO8601 formatted
author_ids is a space delimited list of internal author IDs
"""
groupnamepat = re.compile(r"^g\.(\w+)\$")
out = writer(sys.stdout)
def jsonload (url):
f = urlopen(url)
data = f.read()
f.close()
return json.loads(data)
def main (args):
p = ArgumentParser("")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--format", default="csv", help="output format: csv (default), json")
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {}
data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data)
results = jsonload(requesturl)['data']['padIDs']
results.sort()
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
for padid in results:
print (u"{0}".format(padid), file=sys.stderr)
m = groupnamepat.match(padid)
if m:
groupname = m.group(1)
padidnogroup = padid[m.end():]
else:
groupname = u""
padidnogroup = padid
data['padID'] = padid.encode("utf-8")
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
author_ids = u" ".join(author_ids).encode("utf-8")
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))