etherpump/etherdump/commands/dumpcsv.py

85 lines
2.9 KiB
Python
Raw Normal View History

2015-11-12 12:41:12 +01:00
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re
from datetime import datetime
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from csv import writer
2015-11-13 11:03:57 +01:00
from math import ceil, floor
2015-11-12 12:41:12 +01:00
"""
Dumps a CSV of all pads with columns
padid, groupid, revisions, lastedited, author_ids
padids have their group name trimmed
groupid is without (g. $)
revisions is an integral number of edits
lastedited is ISO8601 formatted
author_ids is a space delimited list of internal author IDs
"""
groupnamepat = re.compile(r"^g\.(\w+)\$")
out = writer(sys.stdout)
def jsonload (url):
f = urlopen(url)
data = f.read()
f.close()
return json.loads(data)
def main (args):
2015-11-19 13:14:16 +01:00
p = ArgumentParser("outputs a CSV of information all all pads")
2015-11-22 21:59:52 +01:00
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
2015-11-13 11:03:57 +01:00
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
2015-11-12 12:41:12 +01:00
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
2015-11-22 21:59:52 +01:00
apiurl = info.get("apiurl")
2015-11-12 12:41:12 +01:00
data = {}
data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data)
2015-11-13 11:03:57 +01:00
padids = jsonload(requesturl)['data']['padIDs']
padids.sort()
numpads = len(padids)
maxmsglen = 0
count = 0
2015-11-12 12:41:12 +01:00
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
2015-11-13 11:03:57 +01:00
for i, padid in enumerate(padids):
p = (float(i) / numpads)
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
if len(msg) > maxmsglen:
maxmsglen = len(msg)
sys.stderr.write("\r{0}".format(" "*maxmsglen))
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush()
2015-11-12 12:41:12 +01:00
m = groupnamepat.match(padid)
if m:
groupname = m.group(1)
padidnogroup = padid[m.end():]
else:
groupname = u""
padidnogroup = padid
data['padID'] = padid.encode("utf-8")
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
2015-11-13 11:03:57 +01:00
if (revisions == 0) and not args.zerorevs:
continue
2015-11-12 12:41:12 +01:00
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
author_ids = u" ".join(author_ids).encode("utf-8")
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
2015-11-13 11:03:57 +01:00
count += 1
2015-11-12 12:41:12 +01:00
2015-11-13 11:03:57 +01:00
print("\nWrote {0} rows...".format(count), file=sys.stderr)
2015-11-12 12:41:12 +01:00