from __future__ import print_function from argparse import ArgumentParser import sys, json, re from datetime import datetime from urllib import urlencode from urllib2 import urlopen, HTTPError, URLError from csv import writer from math import ceil, floor """ Dumps a CSV of all pads with columns padid, groupid, revisions, lastedited, author_ids padids have their group name trimmed groupid is without (g. $) revisions is an integral number of edits lastedited is ISO8601 formatted author_ids is a space delimited list of internal author IDs """ groupnamepat = re.compile(r"^g\.(\w+)\$") out = writer(sys.stdout) def jsonload (url): f = urlopen(url) data = f.read() f.close() return json.loads(data) def main (args): p = ArgumentParser("outputs a CSV of information all all pads") p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False") args = p.parse_args(args) with open(args.padinfo) as f: info = json.load(f) apiurl = info.get("apiurl") data = {} data['apikey'] = info['apikey'] requesturl = apiurl+'listAllPads?'+urlencode(data) padids = jsonload(requesturl)['data']['padIDs'] padids.sort() numpads = len(padids) maxmsglen = 0 count = 0 out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids")) for i, padid in enumerate(padids): p = (float(i) / numpads) percentage = int(floor(p*100)) bars = int(ceil(p*20)) bar = ("*"*bars) + ("-"*(20-bars)) msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid) if len(msg) > maxmsglen: maxmsglen = len(msg) sys.stderr.write("\r{0}".format(" "*maxmsglen)) sys.stderr.write(msg.encode("utf-8")) sys.stderr.flush() m = groupnamepat.match(padid) if m: groupname = m.group(1) padidnogroup = padid[m.end():] else: groupname = u"" padidnogroup = padid data['padID'] = padid.encode("utf-8") revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions'] if (revisions == 0) and not args.zerorevs: continue lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited'] lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat() author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] author_ids = u" ".join(author_ids).encode("utf-8") out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids)) count += 1 print("\nWrote {0} rows...".format(count), file=sys.stderr)