diff --git a/etherpump/commands/pull.py b/etherpump/commands/pull.py index 82fb627..2ba0281 100644 --- a/etherpump/commands/pull.py +++ b/etherpump/commands/pull.py @@ -163,19 +163,7 @@ def build_argument_parser(args): return parser - -def main(args): - p = build_argument_parser(args) - args = p.parse_args(args) - - raw_ext = ".raw.txt" - if args.no_raw_ext: - raw_ext = "" - - info = loadpadinfo(args.padinfo) - data = {} - data['apikey'] = info['apikey'] - +def get_padids(args, info, data): if args.padid: padids = args.padid elif args.glob: @@ -188,258 +176,208 @@ def main(args): info['localapiurl'] + 'listAllPads?' + urlencode(data) )['data']['padIDs'] padids.sort() - numpads = len(padids) - # maxmsglen = 0 - count = 0 - - progress_kwargs = {} - if not istty(): - progress_kwargs.update(dict(disable=True)) - progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs) + return padids - for i, padid in enumerate(progress_pads): - if args.skip != None and i < args.skip: - continue - data['padID'] = padid - p = padpath(padid, args.pub, args.group, args.fix_names) - if args.folder: - p = os.path.join(p, padid) +def handle_pad(args, index, padid, data, info, raw_ext): + if args.skip != None and index < args.skip: + return - metapath = p + ".meta.json" - revisions = None - tries = 1 - skip = False - padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) - meta = {} + data['padID'] = padid + p = padpath(padid, args.pub, args.group, args.fix_names) + if args.folder: + p = os.path.join(p, padid) - while True: - try: - if os.path.exists(metapath): - with open(metapath) as f: - meta.update(json.load(f)) - revisions = getjson( - info['localapiurl'] - + 'getRevisionsCount?' - + urlencode(data) - )['data']['revisions'] - if meta['revisions'] == revisions and not args.force: - skip = True - break - - meta['padid'] = padid - versions = meta["versions"] = [] - versions.append( - { - "url": padurlbase + quote(padid), - "type": "pad", - "code": 200, - } - ) - - if revisions == None: - meta['revisions'] = getjson( - info['localapiurl'] - + 'getRevisionsCount?' - + urlencode(data) - )['data']['revisions'] - else: - meta['revisions'] = revisions + metapath = p + ".meta.json" + revisions = None + tries = 1 + skip = False + padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) + meta = {} - if (meta['revisions'] == 0) and (not args.zerorevs): + while True: + try: + if os.path.exists(metapath): + with open(metapath) as f: + meta.update(json.load(f)) + revisions = getjson( + info['localapiurl'] + 'getRevisionsCount?' + urlencode(data) + )['data']['revisions'] + if meta['revisions'] == revisions and not args.force: skip = True break - # todo: load more metadata! - meta['group'], meta['pad'] = splitpadname(padid) - meta['pathbase'] = p - meta['lastedited_raw'] = int( - getjson( - info['localapiurl'] + 'getLastEdited?' + urlencode(data) - )['data']['lastEdited'] - ) - meta['lastedited_iso'] = datetime.fromtimestamp( - int(meta['lastedited_raw']) / 1000 - ).isoformat() - meta['author_ids'] = getjson( - info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data) - )['data']['authorIDs'] + meta['padid'] = padid + versions = meta["versions"] = [] + versions.append( + {"url": padurlbase + quote(padid), "type": "pad", "code": 200,} + ) + + if revisions is None: + meta['revisions'] = getjson( + info['localapiurl'] + 'getRevisionsCount?' + urlencode(data) + )['data']['revisions'] + else: + meta['revisions'] = revisions + + if (meta['revisions'] == 0) and (not args.zerorevs): + skip = True break - except HTTPError as e: - tries += 1 - if tries > 3: - print( - "Too many failures ({0}), skipping".format(padid), - file=sys.stderr, - ) - skip = True - break - else: - sleep(3) - except TypeError as e: + + # todo: load more metadata! + meta['group'], meta['pad'] = splitpadname(padid) + meta['pathbase'] = p + meta['lastedited_raw'] = int( + getjson( + info['localapiurl'] + 'getLastEdited?' + urlencode(data) + )['data']['lastEdited'] + ) + meta['lastedited_iso'] = datetime.fromtimestamp( + int(meta['lastedited_raw']) / 1000 + ).isoformat() + meta['author_ids'] = getjson( + info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data) + )['data']['authorIDs'] + break + except HTTPError as e: + tries += 1 + if tries > 3: print( - "Type Error loading pad {0} (phantom pad?), skipping".format( - padid - ), + "Too many failures ({0}), skipping".format(padid), file=sys.stderr, ) skip = True break + else: + sleep(3) + except TypeError as e: + print( + "Type Error loading pad {0} (phantom pad?), skipping".format( + padid + ), + file=sys.stderr, + ) + skip = True + break - if skip: - continue + if skip: + return - count += 1 + if args.output: + print(padid) - if args.output: - print(padid) + if args.all or (args.meta or args.text or args.html or args.dhtml): + try: + os.makedirs(os.path.split(metapath)[0]) + except OSError: + pass - if args.all or (args.meta or args.text or args.html or args.dhtml): - try: - os.makedirs(os.path.split(metapath)[0]) - except OSError: - pass - - if args.all or args.text: - text = getjson(info['localapiurl'] + 'getText?' + urlencode(data)) - ver = {"type": "text"} - versions.append(ver) - ver["code"] = text["_code"] - if text["_code"] == 200: - text = text['data']['text'] - - ########################################## - ## ENFORCE __NOPUBLISH__ MAGIC WORD - ########################################## - if args.nopublish and args.nopublish in text: - # NEED TO PURGE ANY EXISTING DOCS - try_deleting( - ( - p + raw_ext, - p + ".raw.html", - p + ".diff.html", - p + ".meta.json", - ) + if args.all or args.text: + text = getjson(info['localapiurl'] + 'getText?' + urlencode(data)) + ver = {"type": "text"} + versions.append(ver) + ver["code"] = text["_code"] + if text["_code"] == 200: + text = text['data']['text'] + + ########################################## + ## ENFORCE __NOPUBLISH__ MAGIC WORD + ########################################## + if args.nopublish and args.nopublish in text: + # NEED TO PURGE ANY EXISTING DOCS + try_deleting( + ( + p + raw_ext, + p + ".raw.html", + p + ".diff.html", + p + ".meta.json", ) - continue - - ########################################## - ## ENFORCE __PUBLISH__ MAGIC WORD - ########################################## - if args.publish_opt_in and args.publish not in text: - try_deleting( - ( - p + raw_ext, - p + ".raw.html", - p + ".diff.html", - p + ".meta.json", - ) + ) + return + + ########################################## + ## ENFORCE __PUBLISH__ MAGIC WORD + ########################################## + if args.publish_opt_in and args.publish not in text: + try_deleting( + ( + p + raw_ext, + p + ".raw.html", + p + ".diff.html", + p + ".meta.json", ) - continue - - ver["path"] = p + raw_ext - ver["url"] = quote(ver["path"]) - with open(ver["path"], "w") as f: - f.write(text) - # once the content is settled, compute a hash - # and link it in the metadata! - - links = [] - if args.css: - links.append({"href": args.css, "rel": "stylesheet"}) - # todo, make this process reflect which files actually were made - versionbaseurl = quote(padid) + ) + return + + ver["path"] = p + raw_ext + ver["url"] = quote(ver["path"]) + with open(ver["path"], "w") as f: + f.write(text) + # once the content is settled, compute a hash + # and link it in the metadata! + + links = [] + if args.css: + links.append({"href": args.css, "rel": "stylesheet"}) + # todo, make this process reflect which files actually were made + versionbaseurl = quote(padid) + links.append( + { + "href": versions[0]["url"], + "rel": "alternate", + "type": "text/html", + "title": "Etherpad", + } + ) + if args.all or args.text: + links.append( + { + "href": versionbaseurl + raw_ext, + "rel": "alternate", + "type": "text/plain", + "title": "Plain text", + } + ) + if args.all or args.html: links.append( { - "href": versions[0]["url"], + "href": versionbaseurl + ".raw.html", "rel": "alternate", "type": "text/html", - "title": "Etherpad", + "title": "HTML", + } + ) + if args.all or args.dhtml: + links.append( + { + "href": versionbaseurl + ".diff.html", + "rel": "alternate", + "type": "text/html", + "title": "HTML with author colors", + } + ) + if args.all or args.meta: + links.append( + { + "href": versionbaseurl + ".meta.json", + "rel": "alternate", + "type": "application/json", + "title": "Meta data", } ) - if args.all or args.text: - links.append( - { - "href": versionbaseurl + raw_ext, - "rel": "alternate", - "type": "text/plain", - "title": "Plain text", - } - ) - if args.all or args.html: - links.append( - { - "href": versionbaseurl + ".raw.html", - "rel": "alternate", - "type": "text/html", - "title": "HTML", - } - ) - if args.all or args.dhtml: - links.append( - { - "href": versionbaseurl + ".diff.html", - "rel": "alternate", - "type": "text/html", - "title": "HTML with author colors", - } - ) - if args.all or args.meta: - links.append( - { - "href": versionbaseurl + ".meta.json", - "rel": "alternate", - "type": "application/json", - "title": "Meta data", - } - ) - - # links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"}) - if args.all or args.dhtml: - data['startRev'] = "0" - html = getjson( - info['localapiurl'] + 'createDiffHTML?' + urlencode(data) - ) - ver = {"type": "diffhtml"} - versions.append(ver) - ver["code"] = html["_code"] - if html["_code"] == 200: - try: - html = html['data']['html'] - ver["path"] = p + ".diff.html" - ver["url"] = quote(ver["path"]) - # doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False) - doc = html5lib.parse( - html, treebuilder="etree", namespaceHTMLElements=False - ) - html5tidy( - doc, - indent=True, - title=padid, - scripts=args.script, - links=links, - ) - with open(ver["path"], "w") as f: - print( - ET.tostring(doc, method="html", encoding="unicode"), - file=f, - ) - except TypeError: - # Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file! - ver["message"] = html["message"] - # with open(ver["path"], "w") as f: - # print ("""
{0}""".format(json.dumps(html, indent=2)), file=f) - - # Process text, html, dhtml, all options - if args.all or args.html: - html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data)) - ver = {"type": "html"} - versions.append(ver) - ver["code"] = html["_code"] - if html["_code"] == 200: + if args.all or args.dhtml: + data['startRev'] = "0" + html = getjson( + info['localapiurl'] + 'createDiffHTML?' + urlencode(data) + ) + ver = {"type": "diffhtml"} + versions.append(ver) + ver["code"] = html["_code"] + if html["_code"] == 200: + try: html = html['data']['html'] - ver["path"] = p + ".raw.html" + ver["path"] = p + ".diff.html" ver["url"] = quote(ver["path"]) doc = html5lib.parse( html, treebuilder="etree", namespaceHTMLElements=False @@ -456,12 +394,58 @@ def main(args): ET.tostring(doc, method="html", encoding="unicode"), file=f, ) + except TypeError: + ver["message"] = html["message"] + + # Process text, html, dhtml, all options + if args.all or args.html: + html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data)) + ver = {"type": "html"} + versions.append(ver) + ver["code"] = html["_code"] + if html["_code"] == 200: + html = html['data']['html'] + ver["path"] = p + ".raw.html" + ver["url"] = quote(ver["path"]) + doc = html5lib.parse( + html, treebuilder="etree", namespaceHTMLElements=False + ) + html5tidy( + doc, indent=True, title=padid, scripts=args.script, links=links, + ) + with open(ver["path"], "w") as f: + print( + ET.tostring(doc, method="html", encoding="unicode"), file=f, + ) + + # output meta + if args.all or args.meta: + ver = {"type": "meta"} + versions.append(ver) + ver["path"] = metapath + ver["url"] = quote(metapath) + with open(metapath, "w") as f: + json.dump(meta, f, indent=2) + + +def main(args): + p = build_argument_parser(args) + args = p.parse_args(args) + + raw_ext = ".raw.txt" + if args.no_raw_ext: + raw_ext = "" + + info = loadpadinfo(args.padinfo) + data = {} + data['apikey'] = info['apikey'] + + padids = get_padids(args, info, data) + + progress_kwargs = {} + if not istty(): + progress_kwargs.update(dict(disable=True)) + progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs) - # output meta - if args.all or args.meta: - ver = {"type": "meta"} - versions.append(ver) - ver["path"] = metapath - ver["url"] = quote(metapath) - with open(metapath, "w") as f: - json.dump(meta, f, indent=2) + for index, padid in enumerate(progress_pads): + handle_pad(args, index, padid, data, info, raw_ext)