etherpump/etherdump/commands/sync.py


								#!/usr/bin/env python

								from __future__ import print_function

								from argparse import ArgumentParser

								import sys, json, re, os

								from datetime import datetime

								from urllib import urlencode

								from urllib2 import urlopen, HTTPError, URLError

								from math import ceil, floor

								from common import *


								"""

								sync(meta):

								    Update meta data files for those that have changed.

								    Check for changed pads by looking at revisions & comparing to existing


								"""


								def jsonload (url):

								    f = urlopen(url)

								    data = f.read()

								    f.close()

								    return json.loads(data)


								def load_padinfo(p):

								    with open(p) as f:

								        info = json.load(f)

								    return info


								def main (args):

								    p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")

								    p.add_argument("padid", nargs="*", default=[])

								    p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")

								    p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")

								    p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub")

								    p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")

								    p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")

								    p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")

								    p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False")

								    p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")

								    p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")

								    p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")

								    args = p.parse_args(args)


								    info = load_padinfo(args.padinfo)

								    data = {}

								    data['apikey'] = info['apikey']


								    if args.padid:

								        padids = args.padid

								    else:

								        padids = jsonload(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']

								    padids.sort()

								    numpads = len(padids)

								    # maxmsglen = 0

								    count = 0

								    for i, padid in enumerate(padids):

								        if args.skip != None and i<args.skip:

								            continue

								        p = (float(i) / numpads)

								        percentage = int(floor(p*100))

								        bars = int(ceil(p*20))

								        bar = ("*"*bars) + ("-"*(20-bars))

								        msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)

								        # if len(msg) > maxmsglen:

								        #     maxmsglen = len(msg)

								        # sys.stderr.write("\r{0}".format(" "*maxmsglen))

								        sys.stderr.write(msg.encode("utf-8"))

								        sys.stderr.flush()


								        data['padID'] = padid.encode("utf-8")

								        p = padpath(padid, args.pub, args.group)

								        metapath = p + ".meta.json"

								        revisions = None

								        tries = 1

								        skip = False

								        while True:

								            try:

								                if os.path.exists(metapath):

								                    with open(metapath) as f:

								                        meta = json.load(f)

								                    revisions = jsonload(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']

								                    if meta['revisions'] == revisions:

								                        skip=True

								                        break


								                meta = {'padid': padid.encode("utf-8")}

								                if revisions == None:

								                    meta['revisions'] = jsonload(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']

								                else:

								                    meta['revisions' ] = revisions


								                if (meta['revisions'] == 0) and (not args.zerorevs):

								                    # print("Skipping zero revs", file=sys.stderr)

								                    skip=True

								                    break


								                # todo: load more metadata!

								                meta['pad'], meta['group'] = splitpadname(padid)

								                meta['pathbase'] = p

								                meta['lastedited_raw'] = int(jsonload(info['apiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])

								                meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()

								                meta['author_ids'] = jsonload(info['apiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']

								                break

								            except HTTPError as e:

								                tries += 1

								                if tries > 3:

								                    print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr)

								                    skip=True

								                    break


								        if skip:

								            continue


								        count += 1


								        print (padid.encode("utf-8"))


								        if args.all or (args.meta or args.text or args.html or args.dhtml):

								            try:

								                os.makedirs(os.path.split(metapath)[0])

								            except OSError:

								                pass


								        if args.all or args.meta:

								            with open(metapath, "w") as f:

								                json.dump(meta, f)


								        # Process text, html, dhtml, all options

								        if args.all or args.text:

								            text = jsonload(info['apiurl']+'getText?'+urlencode(data))

								            text = text['data']['text']

								            with open(p+".txt", "w") as f:

								                f.write(text.encode("utf-8"))


								        if args.all or args.html:

								            html = jsonload(info['apiurl']+'getHTML?'+urlencode(data))

								            html = html['data']['html']

								            with open(p+".html", "w") as f:

								                f.write(html.encode("utf-8"))


								        if args.all or args.dhtml:

								            data['startRev'] = "0"

								            html = jsonload(info['apiurl']+'createDiffHTML?'+urlencode(data))

								            html = html['data']['html']

								            with open(p+".diff.html", "w") as f:

								                f.write(html.encode("utf-8"))


								    print("\n{0} pad(s) changed".format(count), file=sys.stderr)