Try to solve padID bug and refactor WIP solutions
By passing data.copy() I hope to stop the overwriting. I have removed the try/except work-arounds here because it seems to not be necessary now.
This commit is contained in:
parent
6bed5493ef
commit
8227d75d28
@ -295,12 +295,6 @@ async def handle_pad(args, padid, data, info, session):
|
|||||||
reason = "PANIC, couldn't download the pad contents"
|
reason = "PANIC, couldn't download the pad contents"
|
||||||
break
|
break
|
||||||
|
|
||||||
# Note(decentral1se): cannot track this bug down but basically the `data`
|
|
||||||
# and `padid` are getting out of sync and it is ending up that the same pad
|
|
||||||
# over and over again is downloaded. This resets things in a way that it
|
|
||||||
# works. This is a hack and one day TM I will find out how to fix it proper
|
|
||||||
data["padID"] = padid
|
|
||||||
|
|
||||||
if skip:
|
if skip:
|
||||||
print("[ ] {} (skipped, reason: {})".format(padid, reason))
|
print("[ ] {} (skipped, reason: {})".format(padid, reason))
|
||||||
skipped += 1
|
skipped += 1
|
||||||
@ -466,93 +460,63 @@ async def handle_pad(args, padid, data, info, session):
|
|||||||
ver["message"] = html["message"]
|
ver["message"] = html["message"]
|
||||||
|
|
||||||
# Process text, html, dhtml, magicwords and all options
|
# Process text, html, dhtml, magicwords and all options
|
||||||
|
downloaded_html = False
|
||||||
if args.all or args.html:
|
if args.all or args.html:
|
||||||
# mb: line causing the error of not writing the correct HTML content to the correct HTML file:
|
url = info["localapiurl"] + "getHTML?" + urlencode(data)
|
||||||
# url = info["localapiurl"] + "getHTML?" + urlencode(data)
|
|
||||||
# mb: warning, HACK! Catching the error by writing the API request url manually ...
|
|
||||||
url = (
|
|
||||||
info["localapiurl"]
|
|
||||||
+ "getHTML?"
|
|
||||||
+ "padID="
|
|
||||||
+ padid
|
|
||||||
+ "&"
|
|
||||||
+ "apikey="
|
|
||||||
+ data["apikey"]
|
|
||||||
)
|
|
||||||
# print(url)
|
|
||||||
html = await agetjson(session, url)
|
html = await agetjson(session, url)
|
||||||
ver = {"type": "html"}
|
ver = {"type": "html"}
|
||||||
versions.append(ver)
|
versions.append(ver)
|
||||||
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
|
|
||||||
# try:
|
|
||||||
ver["code"] = html["_code"]
|
ver["code"] = html["_code"]
|
||||||
|
downloaded_html = True
|
||||||
|
|
||||||
if html["_code"] == 200:
|
if html["_code"] == 200:
|
||||||
try:
|
html_body = html["data"]["html"]
|
||||||
html = html["data"]["html"]
|
ver["path"] = p + ".raw.html"
|
||||||
ver["path"] = p + ".raw.html"
|
ver["url"] = quote(ver["path"])
|
||||||
ver["url"] = quote(ver["path"])
|
doc = html5lib.parse(
|
||||||
doc = html5lib.parse(
|
html_body, treebuilder="etree", namespaceHTMLElements=False
|
||||||
html, treebuilder="etree", namespaceHTMLElements=False
|
)
|
||||||
)
|
html5tidy(
|
||||||
html5tidy(
|
doc,
|
||||||
doc,
|
indent=True,
|
||||||
indent=True,
|
title=padid,
|
||||||
title=padid,
|
scripts=args.script,
|
||||||
scripts=args.script,
|
links=links,
|
||||||
links=links,
|
)
|
||||||
)
|
async with await trio.open_file(ver["path"], "w") as f:
|
||||||
async with await trio.open_file(ver["path"], "w") as f:
|
output = ET.tostring(doc, method="html", encoding="unicode")
|
||||||
output = ET.tostring(doc, method="html", encoding="unicode")
|
await f.write(output)
|
||||||
await f.write(output)
|
|
||||||
except TypeError:
|
|
||||||
ver["message"] = html["message"]
|
|
||||||
# except Exception as exception:
|
|
||||||
# print("PANIC: {}".format(exception))
|
|
||||||
|
|
||||||
if args.all or args.magicwords:
|
if args.all or args.magicwords:
|
||||||
url = (
|
if not downloaded_html:
|
||||||
info["localapiurl"]
|
html = await agetjson(session, url)
|
||||||
+ "getHTML?"
|
|
||||||
+ "padID="
|
|
||||||
+ padid
|
|
||||||
+ "&"
|
|
||||||
+ "apikey="
|
|
||||||
+ data["apikey"]
|
|
||||||
)
|
|
||||||
# print(url)
|
|
||||||
html = await agetjson(session, url)
|
|
||||||
ver = {"type": "magicwords"}
|
ver = {"type": "magicwords"}
|
||||||
versions.append(ver)
|
versions.append(ver)
|
||||||
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
|
|
||||||
# try:
|
|
||||||
ver["code"] = html["_code"]
|
ver["code"] = html["_code"]
|
||||||
|
|
||||||
if html["_code"] == 200:
|
if html["_code"] == 200:
|
||||||
try:
|
html_body = html["data"]["html"]
|
||||||
html = html["data"]["html"]
|
ver["path"] = p + ".magicwords.html"
|
||||||
ver["path"] = p + ".magicwords.html"
|
ver["url"] = quote(ver["path"])
|
||||||
ver["url"] = quote(ver["path"])
|
for magic_word in magic_words:
|
||||||
for magic_word in magic_words:
|
replace_word = (
|
||||||
replace_word = (
|
"<span class='highlight'>" + magic_word + "</span>"
|
||||||
"<span class='highlight'>" + magic_word + "</span>"
|
|
||||||
)
|
|
||||||
if magic_word in html:
|
|
||||||
html = html.replace(magic_word, replace_word)
|
|
||||||
doc = html5lib.parse(
|
|
||||||
html, treebuilder="etree", namespaceHTMLElements=False
|
|
||||||
)
|
)
|
||||||
# INSERT MAGIC WORDS HIGHLIGHTING STUFF HERE!!!
|
if magic_word in html_body:
|
||||||
html5tidy(
|
html_body = html_body.replace(magic_word, replace_word)
|
||||||
doc,
|
doc = html5lib.parse(
|
||||||
indent=True,
|
html_body, treebuilder="etree", namespaceHTMLElements=False
|
||||||
title=padid,
|
)
|
||||||
scripts=args.script,
|
html5tidy(
|
||||||
links=links,
|
doc,
|
||||||
)
|
indent=True,
|
||||||
async with await trio.open_file(ver["path"], "w") as f:
|
title=padid,
|
||||||
output = ET.tostring(doc, method="html", encoding="unicode")
|
scripts=args.script,
|
||||||
await f.write(output)
|
links=links,
|
||||||
except TypeError:
|
)
|
||||||
ver["message"] = html["message"]
|
async with await trio.open_file(ver["path"], "w") as f:
|
||||||
|
output = ET.tostring(doc, method="html", encoding="unicode")
|
||||||
|
await f.write(output)
|
||||||
|
|
||||||
# output meta
|
# output meta
|
||||||
if args.all or args.meta:
|
if args.all or args.meta:
|
||||||
@ -586,7 +550,9 @@ async def handle_pads(args):
|
|||||||
start = time.time()
|
start = time.time()
|
||||||
async with trio.open_nursery() as nursery:
|
async with trio.open_nursery() as nursery:
|
||||||
for padid in padids:
|
for padid in padids:
|
||||||
nursery.start_soon(handle_pad, args, padid, data, info, session)
|
nursery.start_soon(
|
||||||
|
handle_pad, args, padid, data.copy(), info, session
|
||||||
|
)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
timeit = round(end - start, 2)
|
timeit = round(end - start, 2)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user