Move pad pulling into own function

5 years ago · b82f39a42d
1 changed files with 229 additions and 245 deletions
--- a/etherpump/commands/pull.py
+++ b/etherpump/commands/pull.py
@ -163,19 +163,7 @@ def build_argument_parser(args):
    return parser
-
+def get_padids(args, info, data):
 def main(args):
    p = build_argument_parser(args)
    args = p.parse_args(args)
    raw_ext = ".raw.txt"
    if args.no_raw_ext:
        raw_ext = ""
    info = loadpadinfo(args.padinfo)
    data = {}
    data['apikey'] = info['apikey']
    if args.padid:
        padids = args.padid
    elif args.glob:
@ -188,258 +176,208 @@ def main(args):
            info['localapiurl'] + 'listAllPads?' + urlencode(data)
        )['data']['padIDs']
    padids.sort()
-    numpads = len(padids)
+    return padids
    # maxmsglen = 0
    count = 0
    progress_kwargs = {}
    if not istty():
        progress_kwargs.update(dict(disable=True))
    progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)
    for i, padid in enumerate(progress_pads):
        if args.skip != None and i < args.skip:
            continue
-        data['padID'] = padid
+def handle_pad(args, index, padid, data, info, raw_ext):
-        p = padpath(padid, args.pub, args.group, args.fix_names)
+    if args.skip != None and index < args.skip:
-        if args.folder:
+        return
            p = os.path.join(p, padid)
-        metapath = p + ".meta.json"
+    data['padID'] = padid
-        revisions = None
+    p = padpath(padid, args.pub, args.group, args.fix_names)
-        tries = 1
+    if args.folder:
-        skip = False
+        p = os.path.join(p, padid)
        padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
        meta = {}
-        while True:
+    metapath = p + ".meta.json"
-            try:
+    revisions = None
-                if os.path.exists(metapath):
+    tries = 1
-                    with open(metapath) as f:
+    skip = False
-                        meta.update(json.load(f))
+    padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
-                    revisions = getjson(
+    meta = {}
                        info['localapiurl']
                        + 'getRevisionsCount?'
                        + urlencode(data)
                    )['data']['revisions']
                    if meta['revisions'] == revisions and not args.force:
                        skip = True
                        break
                meta['padid'] = padid
                versions = meta["versions"] = []
                versions.append(
                    {
                        "url": padurlbase + quote(padid),
                        "type": "pad",
                        "code": 200,
                    }
                )
                if revisions == None:
                    meta['revisions'] = getjson(
                        info['localapiurl']
                        + 'getRevisionsCount?'
                        + urlencode(data)
                    )['data']['revisions']
                else:
                    meta['revisions'] = revisions
-                if (meta['revisions'] == 0) and (not args.zerorevs):
+    while True:
        try:
            if os.path.exists(metapath):
                with open(metapath) as f:
                    meta.update(json.load(f))
                revisions = getjson(
                    info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
                )['data']['revisions']
                if meta['revisions'] == revisions and not args.force:
                    skip = True
                    break
-                # todo: load more metadata!
+            meta['padid'] = padid
-                meta['group'], meta['pad'] = splitpadname(padid)
+            versions = meta["versions"] = []
-                meta['pathbase'] = p
+            versions.append(
-                meta['lastedited_raw'] = int(
+                {"url": padurlbase + quote(padid), "type": "pad", "code": 200,}
-                    getjson(
+            )
-                        info['localapiurl'] + 'getLastEdited?' + urlencode(data)
+
-                    )['data']['lastEdited']
+            if revisions is None:
-                )
+                meta['revisions'] = getjson(
-                meta['lastedited_iso'] = datetime.fromtimestamp(
+                    info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
-                    int(meta['lastedited_raw']) / 1000
+                )['data']['revisions']
-                ).isoformat()
+            else:
-                meta['author_ids'] = getjson(
+                meta['revisions'] = revisions
-                    info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
+
-                )['data']['authorIDs']
+            if (meta['revisions'] == 0) and (not args.zerorevs):
                skip = True
                break
-            except HTTPError as e:
+
-                tries += 1
+            # todo: load more metadata!
-                if tries > 3:
+            meta['group'], meta['pad'] = splitpadname(padid)
-                    print(
+            meta['pathbase'] = p
-                        "Too many failures ({0}), skipping".format(padid),
+            meta['lastedited_raw'] = int(
-                        file=sys.stderr,
+                getjson(
-                    )
+                    info['localapiurl'] + 'getLastEdited?' + urlencode(data)
-                    skip = True
+                )['data']['lastEdited']
-                    break
+            )
-                else:
+            meta['lastedited_iso'] = datetime.fromtimestamp(
-                    sleep(3)
+                int(meta['lastedited_raw']) / 1000
-            except TypeError as e:
+            ).isoformat()
            meta['author_ids'] = getjson(
                info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
            )['data']['authorIDs']
            break
        except HTTPError as e:
            tries += 1
            if tries > 3:
                print(
-                    "Type Error loading pad {0} (phantom pad?), skipping".format(
+                    "Too many failures ({0}), skipping".format(padid),
                        padid
                    ),
                    file=sys.stderr,
                )
                skip = True
                break
            else:
                sleep(3)
        except TypeError as e:
            print(
                "Type Error loading pad {0} (phantom pad?), skipping".format(
                    padid
                ),
                file=sys.stderr,
            )
            skip = True
            break
-        if skip:
+    if skip:
-            continue
+        return
-        count += 1
+    if args.output:
        print(padid)
-        if args.output:
+    if args.all or (args.meta or args.text or args.html or args.dhtml):
-            print(padid)
+        try:
            os.makedirs(os.path.split(metapath)[0])
        except OSError:
            pass
-        if args.all or (args.meta or args.text or args.html or args.dhtml):
+    if args.all or args.text:
-            try:
+        text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
-                os.makedirs(os.path.split(metapath)[0])
+        ver = {"type": "text"}
-            except OSError:
+        versions.append(ver)
-                pass
+        ver["code"] = text["_code"]
-
+        if text["_code"] == 200:
-        if args.all or args.text:
+            text = text['data']['text']
-            text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
+
-            ver = {"type": "text"}
+            ##########################################
-            versions.append(ver)
+            ## ENFORCE __NOPUBLISH__ MAGIC WORD
-            ver["code"] = text["_code"]
+            ##########################################
-            if text["_code"] == 200:
+            if args.nopublish and args.nopublish in text:
-                text = text['data']['text']
+                # NEED TO PURGE ANY EXISTING DOCS
-
+                try_deleting(
-                ##########################################
+                    (
-                ## ENFORCE __NOPUBLISH__ MAGIC WORD
+                        p + raw_ext,
-                ##########################################
+                        p + ".raw.html",
-                if args.nopublish and args.nopublish in text:
+                        p + ".diff.html",
-                    # NEED TO PURGE ANY EXISTING DOCS
+                        p + ".meta.json",
                    try_deleting(
                        (
                            p + raw_ext,
                            p + ".raw.html",
                            p + ".diff.html",
                            p + ".meta.json",
                        )
                    )
-                    continue
+                )
-
+                return
-                ##########################################
+
-                ## ENFORCE __PUBLISH__ MAGIC WORD
+            ##########################################
-                ##########################################
+            ## ENFORCE __PUBLISH__ MAGIC WORD
-                if args.publish_opt_in and args.publish not in text:
+            ##########################################
-                    try_deleting(
+            if args.publish_opt_in and args.publish not in text:
-                        (
+                try_deleting(
-                            p + raw_ext,
+                    (
-                            p + ".raw.html",
+                        p + raw_ext,
-                            p + ".diff.html",
+                        p + ".raw.html",
-                            p + ".meta.json",
+                        p + ".diff.html",
-                        )
+                        p + ".meta.json",
                    )
-                    continue
+                )
-
+                return
-                ver["path"] = p + raw_ext
+
-                ver["url"] = quote(ver["path"])
+            ver["path"] = p + raw_ext
-                with open(ver["path"], "w") as f:
+            ver["url"] = quote(ver["path"])
-                    f.write(text)
+            with open(ver["path"], "w") as f:
-                # once the content is settled, compute a hash
+                f.write(text)
-                # and link it in the metadata!
+            # once the content is settled, compute a hash
-
+            # and link it in the metadata!
-        links = []
+
-        if args.css:
+    links = []
-            links.append({"href": args.css, "rel": "stylesheet"})
+    if args.css:
-        # todo, make this process reflect which files actually were made
+        links.append({"href": args.css, "rel": "stylesheet"})
-        versionbaseurl = quote(padid)
+    # todo, make this process reflect which files actually were made
    versionbaseurl = quote(padid)
    links.append(
        {
            "href": versions[0]["url"],
            "rel": "alternate",
            "type": "text/html",
            "title": "Etherpad",
        }
    )
    if args.all or args.text:
        links.append(
            {
                "href": versionbaseurl + raw_ext,
                "rel": "alternate",
                "type": "text/plain",
                "title": "Plain text",
            }
        )
    if args.all or args.html:
        links.append(
            {
-                "href": versions[0]["url"],
+                "href": versionbaseurl + ".raw.html",
                "rel": "alternate",
                "type": "text/html",
-                "title": "Etherpad",
+                "title": "HTML",
            }
        )
    if args.all or args.dhtml:
        links.append(
            {
                "href": versionbaseurl + ".diff.html",
                "rel": "alternate",
                "type": "text/html",
                "title": "HTML with author colors",
            }
        )
    if args.all or args.meta:
        links.append(
            {
                "href": versionbaseurl + ".meta.json",
                "rel": "alternate",
                "type": "application/json",
                "title": "Meta data",
            }
        )
        if args.all or args.text:
            links.append(
                {
                    "href": versionbaseurl + raw_ext,
                    "rel": "alternate",
                    "type": "text/plain",
                    "title": "Plain text",
                }
            )
        if args.all or args.html:
            links.append(
                {
                    "href": versionbaseurl + ".raw.html",
                    "rel": "alternate",
                    "type": "text/html",
                    "title": "HTML",
                }
            )
        if args.all or args.dhtml:
            links.append(
                {
                    "href": versionbaseurl + ".diff.html",
                    "rel": "alternate",
                    "type": "text/html",
                    "title": "HTML with author colors",
                }
            )
        if args.all or args.meta:
            links.append(
                {
                    "href": versionbaseurl + ".meta.json",
                    "rel": "alternate",
                    "type": "application/json",
                    "title": "Meta data",
                }
            )
        # links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
-        if args.all or args.dhtml:
+    if args.all or args.dhtml:
-            data['startRev'] = "0"
+        data['startRev'] = "0"
-            html = getjson(
+        html = getjson(
-                info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
+            info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
-            )
+        )
-            ver = {"type": "diffhtml"}
+        ver = {"type": "diffhtml"}
-            versions.append(ver)
+        versions.append(ver)
-            ver["code"] = html["_code"]
+        ver["code"] = html["_code"]
-            if html["_code"] == 200:
+        if html["_code"] == 200:
-                try:
+            try:
                    html = html['data']['html']
                    ver["path"] = p + ".diff.html"
                    ver["url"] = quote(ver["path"])
                    # doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
                    doc = html5lib.parse(
                        html, treebuilder="etree", namespaceHTMLElements=False
                    )
                    html5tidy(
                        doc,
                        indent=True,
                        title=padid,
                        scripts=args.script,
                        links=links,
                    )
                    with open(ver["path"], "w") as f:
                        print(
                            ET.tostring(doc, method="html", encoding="unicode"),
                            file=f,
                        )
                except TypeError:
                    # Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
                    ver["message"] = html["message"]
                    # with open(ver["path"], "w") as f:
                    #     print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)
        # Process text, html, dhtml, all options
        if args.all or args.html:
            html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
            ver = {"type": "html"}
            versions.append(ver)
            ver["code"] = html["_code"]
            if html["_code"] == 200:
                html = html['data']['html']
-                ver["path"] = p + ".raw.html"
+                ver["path"] = p + ".diff.html"
                ver["url"] = quote(ver["path"])
                doc = html5lib.parse(
                    html, treebuilder="etree", namespaceHTMLElements=False
@ -456,12 +394,58 @@ def main(args):
                        ET.tostring(doc, method="html", encoding="unicode"),
                        file=f,
                    )
            except TypeError:
                ver["message"] = html["message"]
    # Process text, html, dhtml, all options
    if args.all or args.html:
        html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
        ver = {"type": "html"}
        versions.append(ver)
        ver["code"] = html["_code"]
        if html["_code"] == 200:
            html = html['data']['html']
            ver["path"] = p + ".raw.html"
            ver["url"] = quote(ver["path"])
            doc = html5lib.parse(
                html, treebuilder="etree", namespaceHTMLElements=False
            )
            html5tidy(
                doc, indent=True, title=padid, scripts=args.script, links=links,
            )
            with open(ver["path"], "w") as f:
                print(
                    ET.tostring(doc, method="html", encoding="unicode"), file=f,
                )
    # output meta
    if args.all or args.meta:
        ver = {"type": "meta"}
        versions.append(ver)
        ver["path"] = metapath
        ver["url"] = quote(metapath)
        with open(metapath, "w") as f:
            json.dump(meta, f, indent=2)
 def main(args):
    p = build_argument_parser(args)
    args = p.parse_args(args)
    raw_ext = ".raw.txt"
    if args.no_raw_ext:
        raw_ext = ""
    info = loadpadinfo(args.padinfo)
    data = {}
    data['apikey'] = info['apikey']
    padids = get_padids(args, info, data)
    progress_kwargs = {}
    if not istty():
        progress_kwargs.update(dict(disable=True))
    progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)
-        # output meta
+    for index, padid in enumerate(progress_pads):
-        if args.all or args.meta:
+        handle_pad(args, index, padid, data, info, raw_ext)
            ver = {"type": "meta"}
            versions.append(ver)
            ver["path"] = metapath
            ver["url"] = quote(metapath)
            with open(metapath, "w") as f:
                json.dump(meta, f, indent=2)