Pumping pads as files into publishing frameworks!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
2.9 KiB

from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re
from datetime import datetime
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from csv import writer
from math import ceil, floor
"""
Dumps a CSV of all pads with columns
padid, groupid, revisions, lastedited, author_ids
padids have their group name trimmed
groupid is without (g. $)
revisions is an integral number of edits
lastedited is ISO8601 formatted
author_ids is a space delimited list of internal author IDs
"""
groupnamepat = re.compile(r"^g\.(\w+)\$")
out = writer(sys.stdout)
def jsonload (url):
f = urlopen(url)
data = f.read()
f.close()
return json.loads(data)
def main (args):
p = ArgumentParser("outputs a CSV of information all all pads")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = info.get("apiurl")
data = {}
data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data)
padids = jsonload(requesturl)['data']['padIDs']
padids.sort()
numpads = len(padids)
maxmsglen = 0
count = 0
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
for i, padid in enumerate(padids):
p = (float(i) / numpads)
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
if len(msg) > maxmsglen:
maxmsglen = len(msg)
sys.stderr.write("\r{0}".format(" "*maxmsglen))
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush()
m = groupnamepat.match(padid)
if m:
groupname = m.group(1)
padidnogroup = padid[m.end():]
else:
groupname = u""
padidnogroup = padid
data['padID'] = padid.encode("utf-8")
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
if (revisions == 0) and not args.zerorevs:
continue
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
author_ids = u" ".join(author_ids).encode("utf-8")
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
count += 1
print("\nWrote {0} rows...".format(count), file=sys.stderr)