Move pad pulling into own function

2020-01-19 00:03:00 +01:00 · 2020-01-19 00:03:00 +01:00 · b82f39a42d
commit b82f39a42d
parent 97bcca145b
1 changed files with 226 additions and 242 deletions
--- a/etherpump/commands/pull.py
+++ b/etherpump/commands/pull.py
@ -163,19 +163,7 @@ def build_argument_parser(args):
    return parser


-
-def main(args):
-    p = build_argument_parser(args)
-    args = p.parse_args(args)
-
-    raw_ext = ".raw.txt"
-    if args.no_raw_ext:
-        raw_ext = ""
-
-    info = loadpadinfo(args.padinfo)
-    data = {}
-    data['apikey'] = info['apikey']
-
+def get_padids(args, info, data):
    if args.padid:
        padids = args.padid
    elif args.glob:
@ -188,258 +176,208 @@ def main(args):
            info['localapiurl'] + 'listAllPads?' + urlencode(data)
        )['data']['padIDs']
    padids.sort()
-    numpads = len(padids)
-    # maxmsglen = 0
-    count = 0
+    return padids

-    progress_kwargs = {}
-    if not istty():
-        progress_kwargs.update(dict(disable=True))
-    progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)

-    for i, padid in enumerate(progress_pads):
-        if args.skip != None and i < args.skip:
-            continue
+def handle_pad(args, index, padid, data, info, raw_ext):
+    if args.skip != None and index < args.skip:
+        return

-        data['padID'] = padid
-        p = padpath(padid, args.pub, args.group, args.fix_names)
-        if args.folder:
-            p = os.path.join(p, padid)
+    data['padID'] = padid
+    p = padpath(padid, args.pub, args.group, args.fix_names)
+    if args.folder:
+        p = os.path.join(p, padid)

-        metapath = p + ".meta.json"
-        revisions = None
-        tries = 1
-        skip = False
-        padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
-        meta = {}
+    metapath = p + ".meta.json"
+    revisions = None
+    tries = 1
+    skip = False
+    padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
+    meta = {}

-        while True:
-            try:
-                if os.path.exists(metapath):
-                    with open(metapath) as f:
-                        meta.update(json.load(f))
-                    revisions = getjson(
-                        info['localapiurl']
-                        + 'getRevisionsCount?'
-                        + urlencode(data)
-                    )['data']['revisions']
-                    if meta['revisions'] == revisions and not args.force:
-                        skip = True
-                        break
-
-                meta['padid'] = padid
-                versions = meta["versions"] = []
-                versions.append(
-                    {
-                        "url": padurlbase + quote(padid),
-                        "type": "pad",
-                        "code": 200,
-                    }
-                )
-
-                if revisions == None:
-                    meta['revisions'] = getjson(
-                        info['localapiurl']
-                        + 'getRevisionsCount?'
-                        + urlencode(data)
-                    )['data']['revisions']
-                else:
-                    meta['revisions'] = revisions
-
-                if (meta['revisions'] == 0) and (not args.zerorevs):
+    while True:
+        try:
+            if os.path.exists(metapath):
+                with open(metapath) as f:
+                    meta.update(json.load(f))
+                revisions = getjson(
+                    info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
+                )['data']['revisions']
+                if meta['revisions'] == revisions and not args.force:
                    skip = True
                    break

-                # todo: load more metadata!
-                meta['group'], meta['pad'] = splitpadname(padid)
-                meta['pathbase'] = p
-                meta['lastedited_raw'] = int(
-                    getjson(
-                        info['localapiurl'] + 'getLastEdited?' + urlencode(data)
-                    )['data']['lastEdited']
-                )
-                meta['lastedited_iso'] = datetime.fromtimestamp(
-                    int(meta['lastedited_raw']) / 1000
-                ).isoformat()
-                meta['author_ids'] = getjson(
-                    info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
-                )['data']['authorIDs']
+            meta['padid'] = padid
+            versions = meta["versions"] = []
+            versions.append(
+                {"url": padurlbase + quote(padid), "type": "pad", "code": 200,}
+            )
+
+            if revisions is None:
+                meta['revisions'] = getjson(
+                    info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
+                )['data']['revisions']
+            else:
+                meta['revisions'] = revisions
+
+            if (meta['revisions'] == 0) and (not args.zerorevs):
+                skip = True
                break
-            except HTTPError as e:
-                tries += 1
-                if tries > 3:
-                    print(
-                        "Too many failures ({0}), skipping".format(padid),
-                        file=sys.stderr,
-                    )
-                    skip = True
-                    break
-                else:
-                    sleep(3)
-            except TypeError as e:
+
+            # todo: load more metadata!
+            meta['group'], meta['pad'] = splitpadname(padid)
+            meta['pathbase'] = p
+            meta['lastedited_raw'] = int(
+                getjson(
+                    info['localapiurl'] + 'getLastEdited?' + urlencode(data)
+                )['data']['lastEdited']
+            )
+            meta['lastedited_iso'] = datetime.fromtimestamp(
+                int(meta['lastedited_raw']) / 1000
+            ).isoformat()
+            meta['author_ids'] = getjson(
+                info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
+            )['data']['authorIDs']
+            break
+        except HTTPError as e:
+            tries += 1
+            if tries > 3:
                print(
-                    "Type Error loading pad {0} (phantom pad?), skipping".format(
-                        padid
-                    ),
+                    "Too many failures ({0}), skipping".format(padid),
                    file=sys.stderr,
                )
                skip = True
                break
+            else:
+                sleep(3)
+        except TypeError as e:
+            print(
+                "Type Error loading pad {0} (phantom pad?), skipping".format(
+                    padid
+                ),
+                file=sys.stderr,
+            )
+            skip = True
+            break

-        if skip:
-            continue
+    if skip:
+        return

-        count += 1
+    if args.output:
+        print(padid)

-        if args.output:
-            print(padid)
+    if args.all or (args.meta or args.text or args.html or args.dhtml):
+        try:
+            os.makedirs(os.path.split(metapath)[0])
+        except OSError:
+            pass

-        if args.all or (args.meta or args.text or args.html or args.dhtml):
-            try:
-                os.makedirs(os.path.split(metapath)[0])
-            except OSError:
-                pass
+    if args.all or args.text:
+        text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
+        ver = {"type": "text"}
+        versions.append(ver)
+        ver["code"] = text["_code"]
+        if text["_code"] == 200:
+            text = text['data']['text']

-        if args.all or args.text:
-            text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
-            ver = {"type": "text"}
-            versions.append(ver)
-            ver["code"] = text["_code"]
-            if text["_code"] == 200:
-                text = text['data']['text']
-
-                ##########################################
-                ## ENFORCE __NOPUBLISH__ MAGIC WORD
-                ##########################################
-                if args.nopublish and args.nopublish in text:
-                    # NEED TO PURGE ANY EXISTING DOCS
-                    try_deleting(
-                        (
-                            p + raw_ext,
-                            p + ".raw.html",
-                            p + ".diff.html",
-                            p + ".meta.json",
-                        )
+            ##########################################
+            ## ENFORCE __NOPUBLISH__ MAGIC WORD
+            ##########################################
+            if args.nopublish and args.nopublish in text:
+                # NEED TO PURGE ANY EXISTING DOCS
+                try_deleting(
+                    (
+                        p + raw_ext,
+                        p + ".raw.html",
+                        p + ".diff.html",
+                        p + ".meta.json",
                    )
-                    continue
+                )
+                return

-                ##########################################
-                ## ENFORCE __PUBLISH__ MAGIC WORD
-                ##########################################
-                if args.publish_opt_in and args.publish not in text:
-                    try_deleting(
-                        (
-                            p + raw_ext,
-                            p + ".raw.html",
-                            p + ".diff.html",
-                            p + ".meta.json",
-                        )
+            ##########################################
+            ## ENFORCE __PUBLISH__ MAGIC WORD
+            ##########################################
+            if args.publish_opt_in and args.publish not in text:
+                try_deleting(
+                    (
+                        p + raw_ext,
+                        p + ".raw.html",
+                        p + ".diff.html",
+                        p + ".meta.json",
                    )
-                    continue
+                )
+                return

-                ver["path"] = p + raw_ext
-                ver["url"] = quote(ver["path"])
-                with open(ver["path"], "w") as f:
-                    f.write(text)
-                # once the content is settled, compute a hash
-                # and link it in the metadata!
+            ver["path"] = p + raw_ext
+            ver["url"] = quote(ver["path"])
+            with open(ver["path"], "w") as f:
+                f.write(text)
+            # once the content is settled, compute a hash
+            # and link it in the metadata!

-        links = []
-        if args.css:
-            links.append({"href": args.css, "rel": "stylesheet"})
-        # todo, make this process reflect which files actually were made
-        versionbaseurl = quote(padid)
+    links = []
+    if args.css:
+        links.append({"href": args.css, "rel": "stylesheet"})
+    # todo, make this process reflect which files actually were made
+    versionbaseurl = quote(padid)
+    links.append(
+        {
+            "href": versions[0]["url"],
+            "rel": "alternate",
+            "type": "text/html",
+            "title": "Etherpad",
+        }
+    )
+    if args.all or args.text:
        links.append(
            {
-                "href": versions[0]["url"],
+                "href": versionbaseurl + raw_ext,
                "rel": "alternate",
-                "type": "text/html",
-                "title": "Etherpad",
+                "type": "text/plain",
+                "title": "Plain text",
+            }
+        )
+    if args.all or args.html:
+        links.append(
+            {
+                "href": versionbaseurl + ".raw.html",
+                "rel": "alternate",
+                "type": "text/html",
+                "title": "HTML",
+            }
+        )
+    if args.all or args.dhtml:
+        links.append(
+            {
+                "href": versionbaseurl + ".diff.html",
+                "rel": "alternate",
+                "type": "text/html",
+                "title": "HTML with author colors",
+            }
+        )
+    if args.all or args.meta:
+        links.append(
+            {
+                "href": versionbaseurl + ".meta.json",
+                "rel": "alternate",
+                "type": "application/json",
+                "title": "Meta data",
            }
        )
-        if args.all or args.text:
-            links.append(
-                {
-                    "href": versionbaseurl + raw_ext,
-                    "rel": "alternate",
-                    "type": "text/plain",
-                    "title": "Plain text",
-                }
-            )
-        if args.all or args.html:
-            links.append(
-                {
-                    "href": versionbaseurl + ".raw.html",
-                    "rel": "alternate",
-                    "type": "text/html",
-                    "title": "HTML",
-                }
-            )
-        if args.all or args.dhtml:
-            links.append(
-                {
-                    "href": versionbaseurl + ".diff.html",
-                    "rel": "alternate",
-                    "type": "text/html",
-                    "title": "HTML with author colors",
-                }
-            )
-        if args.all or args.meta:
-            links.append(
-                {
-                    "href": versionbaseurl + ".meta.json",
-                    "rel": "alternate",
-                    "type": "application/json",
-                    "title": "Meta data",
-                }
-            )

-        # links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
-
-        if args.all or args.dhtml:
-            data['startRev'] = "0"
-            html = getjson(
-                info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
-            )
-            ver = {"type": "diffhtml"}
-            versions.append(ver)
-            ver["code"] = html["_code"]
-            if html["_code"] == 200:
-                try:
-                    html = html['data']['html']
-                    ver["path"] = p + ".diff.html"
-                    ver["url"] = quote(ver["path"])
-                    # doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
-                    doc = html5lib.parse(
-                        html, treebuilder="etree", namespaceHTMLElements=False
-                    )
-                    html5tidy(
-                        doc,
-                        indent=True,
-                        title=padid,
-                        scripts=args.script,
-                        links=links,
-                    )
-                    with open(ver["path"], "w") as f:
-                        print(
-                            ET.tostring(doc, method="html", encoding="unicode"),
-                            file=f,
-                        )
-                except TypeError:
-                    # Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
-                    ver["message"] = html["message"]
-                    # with open(ver["path"], "w") as f:
-                    #     print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)
-
-        # Process text, html, dhtml, all options
-        if args.all or args.html:
-            html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
-            ver = {"type": "html"}
-            versions.append(ver)
-            ver["code"] = html["_code"]
-            if html["_code"] == 200:
+    if args.all or args.dhtml:
+        data['startRev'] = "0"
+        html = getjson(
+            info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
+        )
+        ver = {"type": "diffhtml"}
+        versions.append(ver)
+        ver["code"] = html["_code"]
+        if html["_code"] == 200:
+            try:
                html = html['data']['html']
-                ver["path"] = p + ".raw.html"
+                ver["path"] = p + ".diff.html"
                ver["url"] = quote(ver["path"])
                doc = html5lib.parse(
                    html, treebuilder="etree", namespaceHTMLElements=False
@ -456,12 +394,58 @@ def main(args):
                        ET.tostring(doc, method="html", encoding="unicode"),
                        file=f,
                    )
+            except TypeError:
+                ver["message"] = html["message"]

-        # output meta
-        if args.all or args.meta:
-            ver = {"type": "meta"}
-            versions.append(ver)
-            ver["path"] = metapath
-            ver["url"] = quote(metapath)
-            with open(metapath, "w") as f:
-                json.dump(meta, f, indent=2)
+    # Process text, html, dhtml, all options
+    if args.all or args.html:
+        html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
+        ver = {"type": "html"}
+        versions.append(ver)
+        ver["code"] = html["_code"]
+        if html["_code"] == 200:
+            html = html['data']['html']
+            ver["path"] = p + ".raw.html"
+            ver["url"] = quote(ver["path"])
+            doc = html5lib.parse(
+                html, treebuilder="etree", namespaceHTMLElements=False
+            )
+            html5tidy(
+                doc, indent=True, title=padid, scripts=args.script, links=links,
+            )
+            with open(ver["path"], "w") as f:
+                print(
+                    ET.tostring(doc, method="html", encoding="unicode"), file=f,
+                )
+
+    # output meta
+    if args.all or args.meta:
+        ver = {"type": "meta"}
+        versions.append(ver)
+        ver["path"] = metapath
+        ver["url"] = quote(metapath)
+        with open(metapath, "w") as f:
+            json.dump(meta, f, indent=2)
+
+
+def main(args):
+    p = build_argument_parser(args)
+    args = p.parse_args(args)
+
+    raw_ext = ".raw.txt"
+    if args.no_raw_ext:
+        raw_ext = ""
+
+    info = loadpadinfo(args.padinfo)
+    data = {}
+    data['apikey'] = info['apikey']
+
+    padids = get_padids(args, info, data)
+
+    progress_kwargs = {}
+    if not istty():
+        progress_kwargs.update(dict(disable=True))
+    progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)
+
+    for index, padid in enumerate(progress_pads):
+        handle_pad(args, index, padid, data, info, raw_ext)