Try to solve padID bug and refactor WIP solutions

By passing data.copy() I hope to stop the overwriting. I have removed the try/except work-arounds here because it seems to not be necessary now.
2020-12-12 16:29:38 +01:00 · 2020-12-12 16:29:38 +01:00 · 8227d75d28
commit 8227d75d28
parent 6bed5493ef
1 changed files with 47 additions and 81 deletions
--- a/etherpump/commands/pull.py
+++ b/etherpump/commands/pull.py
@ -295,12 +295,6 @@ async def handle_pad(args, padid, data, info, session):
            reason = "PANIC, couldn't download the pad contents"
            break

-    # Note(decentral1se): cannot track this bug down but basically the `data`
-    # and `padid` are getting out of sync and it is ending up that the same pad
-    # over and over again is downloaded. This resets things in a way that it
-    # works. This is a hack and one day TM I will find out how to fix it proper
-    data["padID"] = padid
-
    if skip:
        print("[ ] {} (skipped, reason: {})".format(padid, reason))
        skipped += 1
@ -466,93 +460,63 @@ async def handle_pad(args, padid, data, info, session):
                ver["message"] = html["message"]

    # Process text, html, dhtml, magicwords and all options
+    downloaded_html = False
    if args.all or args.html:
-        # mb: line causing the error of not writing the correct HTML content to the correct HTML file:
-        # url = info["localapiurl"] + "getHTML?" + urlencode(data)
-        # mb: warning, HACK! Catching the error by writing the API request url manually ...
-        url = (
-            info["localapiurl"]
-            + "getHTML?"
-            + "padID="
-            + padid
-            + "&"
-            + "apikey="
-            + data["apikey"]
-        )
-        # print(url)
+        url = info["localapiurl"] + "getHTML?" + urlencode(data)
        html = await agetjson(session, url)
        ver = {"type": "html"}
        versions.append(ver)
-        # mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
-        # try:
        ver["code"] = html["_code"]
+        downloaded_html = True
+
        if html["_code"] == 200:
-            try:
-                html = html["data"]["html"]
-                ver["path"] = p + ".raw.html"
-                ver["url"] = quote(ver["path"])
-                doc = html5lib.parse(
-                    html, treebuilder="etree", namespaceHTMLElements=False
-                )
-                html5tidy(
-                    doc,
-                    indent=True,
-                    title=padid,
-                    scripts=args.script,
-                    links=links,
-                )
-                async with await trio.open_file(ver["path"], "w") as f:
-                    output = ET.tostring(doc, method="html", encoding="unicode")
-                    await f.write(output)
-            except TypeError:
-                ver["message"] = html["message"]
-        # except Exception as exception:
-        # print("PANIC: {}".format(exception))
+            html_body = html["data"]["html"]
+            ver["path"] = p + ".raw.html"
+            ver["url"] = quote(ver["path"])
+            doc = html5lib.parse(
+                html_body, treebuilder="etree", namespaceHTMLElements=False
+            )
+            html5tidy(
+                doc,
+                indent=True,
+                title=padid,
+                scripts=args.script,
+                links=links,
+            )
+            async with await trio.open_file(ver["path"], "w") as f:
+                output = ET.tostring(doc, method="html", encoding="unicode")
+                await f.write(output)

    if args.all or args.magicwords:
-        url = (
-            info["localapiurl"]
-            + "getHTML?"
-            + "padID="
-            + padid
-            + "&"
-            + "apikey="
-            + data["apikey"]
-        )
-        # print(url)
-        html = await agetjson(session, url)
+        if not downloaded_html:
+            html = await agetjson(session, url)
        ver = {"type": "magicwords"}
        versions.append(ver)
-        # mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
-        # try:
        ver["code"] = html["_code"]
+
        if html["_code"] == 200:
-            try:
-                html = html["data"]["html"]
-                ver["path"] = p + ".magicwords.html"
-                ver["url"] = quote(ver["path"])
-                for magic_word in magic_words:
-                    replace_word = (
-                        "<span class='highlight'>" + magic_word + "</span>"
-                    )
-                    if magic_word in html:
-                        html = html.replace(magic_word, replace_word)
-                doc = html5lib.parse(
-                    html, treebuilder="etree", namespaceHTMLElements=False
+            html_body = html["data"]["html"]
+            ver["path"] = p + ".magicwords.html"
+            ver["url"] = quote(ver["path"])
+            for magic_word in magic_words:
+                replace_word = (
+                    "<span class='highlight'>" + magic_word + "</span>"
                )
-                # INSERT MAGIC WORDS HIGHLIGHTING STUFF HERE!!!
-                html5tidy(
-                    doc,
-                    indent=True,
-                    title=padid,
-                    scripts=args.script,
-                    links=links,
-                )
-                async with await trio.open_file(ver["path"], "w") as f:
-                    output = ET.tostring(doc, method="html", encoding="unicode")
-                    await f.write(output)
-            except TypeError:
-                ver["message"] = html["message"]
+                if magic_word in html_body:
+                    html_body = html_body.replace(magic_word, replace_word)
+            doc = html5lib.parse(
+                html_body, treebuilder="etree", namespaceHTMLElements=False
+            )
+            html5tidy(
+                doc,
+                indent=True,
+                title=padid,
+                scripts=args.script,
+                links=links,
+            )
+            async with await trio.open_file(ver["path"], "w") as f:
+                output = ET.tostring(doc, method="html", encoding="unicode")
+                await f.write(output)

    # output meta
    if args.all or args.meta:
@ -586,7 +550,9 @@ async def handle_pads(args):
    start = time.time()
    async with trio.open_nursery() as nursery:
        for padid in padids:
-            nursery.start_soon(handle_pad, args, padid, data, info, session)
+            nursery.start_soon(
+                handle_pad, args, padid, data.copy(), info, session
+            )
    end = time.time()
    timeit = round(end - start, 2)