diff --git a/etherdump/commands/common.py b/etherdump/commands/common.py index b8d6194..90a21fd 100644 --- a/etherdump/commands/common.py +++ b/etherdump/commands/common.py @@ -1,6 +1,7 @@ -import re, os +import re, os, json, sys from urllib import quote_plus, unquote_plus - +from math import ceil, floor +from urllib2 import urlopen groupnamepat = re.compile(r"^g\.(\w+)\$") def splitpadname (padid): @@ -23,3 +24,34 @@ def padpath (padid, pub_path=u"", group_path=u""): return os.path.join(group_path, g, p) else: return os.path.join(pub_path, p) + +def padpath2id (path): + if type(path) == unicode: + path = path.encode("utf-8") + dd, p = os.path.split(path) + gname = dd.split("/")[-1] + p = unquote_plus(p) + if gname: + return "{0}${1}".format(gname, p).decode("utf-8") + else: + return p.decode("utf-8") + +def getjson (url): + f = urlopen(url) + data = f.read() + f.close() + return json.loads(data) + +def loadpadinfo(p): + with open(p) as f: + info = json.load(f) + return info + +def progressbar (i, num, label="", file=sys.stderr): + p = float(i) / num + percentage = int(floor(p*100)) + bars = int(ceil(p*20)) + bar = ("*"*bars) + ("-"*(20-bars)) + msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label) + sys.stderr.write(msg.encode("utf-8")) + sys.stderr.flush() diff --git a/etherdump/commands/sync.py b/etherdump/commands/pull.py similarity index 74% rename from etherdump/commands/sync.py rename to etherdump/commands/pull.py index fee7ff5..18b3610 100644 --- a/etherdump/commands/sync.py +++ b/etherdump/commands/pull.py @@ -4,29 +4,16 @@ from argparse import ArgumentParser import sys, json, re, os from datetime import datetime from urllib import urlencode -from urllib2 import urlopen, HTTPError, URLError -from math import ceil, floor +from urllib2 import HTTPError from common import * """ -sync(meta): +pull(meta): Update meta data files for those that have changed. Check for changed pads by looking at revisions & comparing to existing """ -def jsonload (url): - f = urlopen(url) - data = f.read() - f.close() - return json.loads(data) - -def load_padinfo(p): - with open(p) as f: - info = json.load(f) - return info - - def main (args): p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") p.add_argument("padid", nargs="*", default=[]) @@ -42,14 +29,14 @@ def main (args): p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") args = p.parse_args(args) - info = load_padinfo(args.padinfo) + info = loadpadinfo(args.padinfo) data = {} data['apikey'] = info['apikey'] if args.padid: padids = args.padid else: - padids = jsonload(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] + padids = getjson(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] padids.sort() numpads = len(padids) # maxmsglen = 0 @@ -57,16 +44,7 @@ def main (args): for i, padid in enumerate(padids): if args.skip != None and i maxmsglen: - # maxmsglen = len(msg) - # sys.stderr.write("\r{0}".format(" "*maxmsglen)) - sys.stderr.write(msg.encode("utf-8")) - sys.stderr.flush() + progressbar(i, numpads, padid) data['padID'] = padid.encode("utf-8") p = padpath(padid, args.pub, args.group) @@ -79,14 +57,14 @@ def main (args): if os.path.exists(metapath): with open(metapath) as f: meta = json.load(f) - revisions = jsonload(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] + revisions = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] if meta['revisions'] == revisions: skip=True break meta = {'padid': padid.encode("utf-8")} if revisions == None: - meta['revisions'] = jsonload(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] + meta['revisions'] = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] else: meta['revisions' ] = revisions @@ -98,9 +76,9 @@ def main (args): # todo: load more metadata! meta['pad'], meta['group'] = splitpadname(padid) meta['pathbase'] = p - meta['lastedited_raw'] = int(jsonload(info['apiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) + meta['lastedited_raw'] = int(getjson(info['apiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() - meta['author_ids'] = jsonload(info['apiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] + meta['author_ids'] = getjson(info['apiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] break except HTTPError as e: tries += 1 @@ -128,20 +106,20 @@ def main (args): # Process text, html, dhtml, all options if args.all or args.text: - text = jsonload(info['apiurl']+'getText?'+urlencode(data)) + text = getjson(info['apiurl']+'getText?'+urlencode(data)) text = text['data']['text'] with open(p+".txt", "w") as f: f.write(text.encode("utf-8")) if args.all or args.html: - html = jsonload(info['apiurl']+'getHTML?'+urlencode(data)) + html = getjson(info['apiurl']+'getHTML?'+urlencode(data)) html = html['data']['html'] with open(p+".html", "w") as f: f.write(html.encode("utf-8")) if args.all or args.dhtml: data['startRev'] = "0" - html = jsonload(info['apiurl']+'createDiffHTML?'+urlencode(data)) + html = getjson(info['apiurl']+'createDiffHTML?'+urlencode(data)) html = html['data']['html'] with open(p+".diff.html", "w") as f: f.write(html.encode("utf-8"))