Try to solve padID bug and refactor WIP solutions

By passing data.copy() I hope to stop the overwriting. I have removed
the try/except work-arounds here because it seems to not be necessary
now.
This commit is contained in:
Luke Murphy 2020-12-12 16:29:38 +01:00
parent 6bed5493ef
commit 8227d75d28
No known key found for this signature in database
GPG Key ID: 5E2EF5A63E3718CC

View File

@ -295,12 +295,6 @@ async def handle_pad(args, padid, data, info, session):
reason = "PANIC, couldn't download the pad contents" reason = "PANIC, couldn't download the pad contents"
break break
# Note(decentral1se): cannot track this bug down but basically the `data`
# and `padid` are getting out of sync and it is ending up that the same pad
# over and over again is downloaded. This resets things in a way that it
# works. This is a hack and one day TM I will find out how to fix it proper
data["padID"] = padid
if skip: if skip:
print("[ ] {} (skipped, reason: {})".format(padid, reason)) print("[ ] {} (skipped, reason: {})".format(padid, reason))
skipped += 1 skipped += 1
@ -466,93 +460,63 @@ async def handle_pad(args, padid, data, info, session):
ver["message"] = html["message"] ver["message"] = html["message"]
# Process text, html, dhtml, magicwords and all options # Process text, html, dhtml, magicwords and all options
downloaded_html = False
if args.all or args.html: if args.all or args.html:
# mb: line causing the error of not writing the correct HTML content to the correct HTML file: url = info["localapiurl"] + "getHTML?" + urlencode(data)
# url = info["localapiurl"] + "getHTML?" + urlencode(data)
# mb: warning, HACK! Catching the error by writing the API request url manually ...
url = (
info["localapiurl"]
+ "getHTML?"
+ "padID="
+ padid
+ "&"
+ "apikey="
+ data["apikey"]
)
# print(url)
html = await agetjson(session, url) html = await agetjson(session, url)
ver = {"type": "html"} ver = {"type": "html"}
versions.append(ver) versions.append(ver)
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
# try:
ver["code"] = html["_code"] ver["code"] = html["_code"]
downloaded_html = True
if html["_code"] == 200: if html["_code"] == 200:
try: html_body = html["data"]["html"]
html = html["data"]["html"] ver["path"] = p + ".raw.html"
ver["path"] = p + ".raw.html" ver["url"] = quote(ver["path"])
ver["url"] = quote(ver["path"]) doc = html5lib.parse(
doc = html5lib.parse( html_body, treebuilder="etree", namespaceHTMLElements=False
html, treebuilder="etree", namespaceHTMLElements=False )
) html5tidy(
html5tidy( doc,
doc, indent=True,
indent=True, title=padid,
title=padid, scripts=args.script,
scripts=args.script, links=links,
links=links, )
) async with await trio.open_file(ver["path"], "w") as f:
async with await trio.open_file(ver["path"], "w") as f: output = ET.tostring(doc, method="html", encoding="unicode")
output = ET.tostring(doc, method="html", encoding="unicode") await f.write(output)
await f.write(output)
except TypeError:
ver["message"] = html["message"]
# except Exception as exception:
# print("PANIC: {}".format(exception))
if args.all or args.magicwords: if args.all or args.magicwords:
url = ( if not downloaded_html:
info["localapiurl"] html = await agetjson(session, url)
+ "getHTML?"
+ "padID="
+ padid
+ "&"
+ "apikey="
+ data["apikey"]
)
# print(url)
html = await agetjson(session, url)
ver = {"type": "magicwords"} ver = {"type": "magicwords"}
versions.append(ver) versions.append(ver)
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
# try:
ver["code"] = html["_code"] ver["code"] = html["_code"]
if html["_code"] == 200: if html["_code"] == 200:
try: html_body = html["data"]["html"]
html = html["data"]["html"] ver["path"] = p + ".magicwords.html"
ver["path"] = p + ".magicwords.html" ver["url"] = quote(ver["path"])
ver["url"] = quote(ver["path"]) for magic_word in magic_words:
for magic_word in magic_words: replace_word = (
replace_word = ( "<span class='highlight'>" + magic_word + "</span>"
"<span class='highlight'>" + magic_word + "</span>"
)
if magic_word in html:
html = html.replace(magic_word, replace_word)
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
) )
# INSERT MAGIC WORDS HIGHLIGHTING STUFF HERE!!! if magic_word in html_body:
html5tidy( html_body = html_body.replace(magic_word, replace_word)
doc, doc = html5lib.parse(
indent=True, html_body, treebuilder="etree", namespaceHTMLElements=False
title=padid, )
scripts=args.script, html5tidy(
links=links, doc,
) indent=True,
async with await trio.open_file(ver["path"], "w") as f: title=padid,
output = ET.tostring(doc, method="html", encoding="unicode") scripts=args.script,
await f.write(output) links=links,
except TypeError: )
ver["message"] = html["message"] async with await trio.open_file(ver["path"], "w") as f:
output = ET.tostring(doc, method="html", encoding="unicode")
await f.write(output)
# output meta # output meta
if args.all or args.meta: if args.all or args.meta:
@ -586,7 +550,9 @@ async def handle_pads(args):
start = time.time() start = time.time()
async with trio.open_nursery() as nursery: async with trio.open_nursery() as nursery:
for padid in padids: for padid in padids:
nursery.start_soon(handle_pad, args, padid, data, info, session) nursery.start_soon(
handle_pad, args, padid, data.copy(), info, session
)
end = time.time() end = time.time()
timeit = round(end - start, 2) timeit = round(end - start, 2)