#!/usr/bin/env python from __future__ import print_function from argparse import ArgumentParser import sys, json, re, os from datetime import datetime from urllib import urlencode from urllib2 import urlopen, HTTPError, URLError from math import ceil, floor from common import * """ sync(meta): Update meta data files for those that have changed. Check for changed pads by looking at revisions & comparing to existing """ def jsonload (url): f = urlopen(url) data = f.read() f.close() return json.loads(data) def load_padinfo(p): with open(p) as f: info = json.load(f) return info def main (args): p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") p.add_argument("padid", nargs="*", default=[]) p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub") p.add_argument("--group", default="g", help="folder to store files for group pads, default: g") p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False") p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False") p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False") p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False") p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") args = p.parse_args(args) info = load_padinfo(args.padinfo) data = {} data['apikey'] = info['apikey'] if args.padid: padids = args.padid else: padids = jsonload(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] padids.sort() numpads = len(padids) # maxmsglen = 0 count = 0 for i, padid in enumerate(padids): if args.skip != None and i maxmsglen: # maxmsglen = len(msg) # sys.stderr.write("\r{0}".format(" "*maxmsglen)) sys.stderr.write(msg.encode("utf-8")) sys.stderr.flush() data['padID'] = padid.encode("utf-8") p = padpath(padid, args.pub, args.group) metapath = p + ".meta.json" revisions = None tries = 1 skip = False while True: try: if os.path.exists(metapath): with open(metapath) as f: meta = json.load(f) revisions = jsonload(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] if meta['revisions'] == revisions: skip=True break meta = {'padid': padid.encode("utf-8")} if revisions == None: meta['revisions'] = jsonload(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] else: meta['revisions' ] = revisions if (meta['revisions'] == 0) and (not args.zerorevs): # print("Skipping zero revs", file=sys.stderr) skip=True break # todo: load more metadata! meta['pad'], meta['group'] = splitpadname(padid) meta['pathbase'] = p meta['lastedited_raw'] = int(jsonload(info['apiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() meta['author_ids'] = jsonload(info['apiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] break except HTTPError as e: tries += 1 if tries > 3: print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr) skip=True break if skip: continue count += 1 print (padid.encode("utf-8")) if args.all or (args.meta or args.text or args.html or args.dhtml): try: os.makedirs(os.path.split(metapath)[0]) except OSError: pass if args.all or args.meta: with open(metapath, "w") as f: json.dump(meta, f) # Process text, html, dhtml, all options if args.all or args.text: text = jsonload(info['apiurl']+'getText?'+urlencode(data)) text = text['data']['text'] with open(p+".txt", "w") as f: f.write(text.encode("utf-8")) if args.all or args.html: html = jsonload(info['apiurl']+'getHTML?'+urlencode(data)) html = html['data']['html'] with open(p+".html", "w") as f: f.write(html.encode("utf-8")) if args.all or args.dhtml: data['startRev'] = "0" html = jsonload(info['apiurl']+'createDiffHTML?'+urlencode(data)) html = html['data']['html'] with open(p+".diff.html", "w") as f: f.write(html.encode("utf-8")) print("\n{0} pad(s) changed".format(count), file=sys.stderr)