|
|
@ -295,12 +295,6 @@ async def handle_pad(args, padid, data, info, session): |
|
|
|
reason = "PANIC, couldn't download the pad contents" |
|
|
|
break |
|
|
|
|
|
|
|
# Note(decentral1se): cannot track this bug down but basically the `data` |
|
|
|
# and `padid` are getting out of sync and it is ending up that the same pad |
|
|
|
# over and over again is downloaded. This resets things in a way that it |
|
|
|
# works. This is a hack and one day TM I will find out how to fix it proper |
|
|
|
data["padID"] = padid |
|
|
|
|
|
|
|
if skip: |
|
|
|
print("[ ] {} (skipped, reason: {})".format(padid, reason)) |
|
|
|
skipped += 1 |
|
|
@ -466,33 +460,21 @@ async def handle_pad(args, padid, data, info, session): |
|
|
|
ver["message"] = html["message"] |
|
|
|
|
|
|
|
# Process text, html, dhtml, magicwords and all options |
|
|
|
downloaded_html = False |
|
|
|
if args.all or args.html: |
|
|
|
# mb: line causing the error of not writing the correct HTML content to the correct HTML file: |
|
|
|
# url = info["localapiurl"] + "getHTML?" + urlencode(data) |
|
|
|
# mb: warning, HACK! Catching the error by writing the API request url manually ... |
|
|
|
url = ( |
|
|
|
info["localapiurl"] |
|
|
|
+ "getHTML?" |
|
|
|
+ "padID=" |
|
|
|
+ padid |
|
|
|
+ "&" |
|
|
|
+ "apikey=" |
|
|
|
+ data["apikey"] |
|
|
|
) |
|
|
|
# print(url) |
|
|
|
url = info["localapiurl"] + "getHTML?" + urlencode(data) |
|
|
|
html = await agetjson(session, url) |
|
|
|
ver = {"type": "html"} |
|
|
|
versions.append(ver) |
|
|
|
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull. |
|
|
|
# try: |
|
|
|
ver["code"] = html["_code"] |
|
|
|
downloaded_html = True |
|
|
|
|
|
|
|
if html["_code"] == 200: |
|
|
|
try: |
|
|
|
html = html["data"]["html"] |
|
|
|
html_body = html["data"]["html"] |
|
|
|
ver["path"] = p + ".raw.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
doc = html5lib.parse( |
|
|
|
html, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
html_body, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
html5tidy( |
|
|
|
doc, |
|
|
@ -504,43 +486,27 @@ async def handle_pad(args, padid, data, info, session): |
|
|
|
async with await trio.open_file(ver["path"], "w") as f: |
|
|
|
output = ET.tostring(doc, method="html", encoding="unicode") |
|
|
|
await f.write(output) |
|
|
|
except TypeError: |
|
|
|
ver["message"] = html["message"] |
|
|
|
# except Exception as exception: |
|
|
|
# print("PANIC: {}".format(exception)) |
|
|
|
|
|
|
|
if args.all or args.magicwords: |
|
|
|
url = ( |
|
|
|
info["localapiurl"] |
|
|
|
+ "getHTML?" |
|
|
|
+ "padID=" |
|
|
|
+ padid |
|
|
|
+ "&" |
|
|
|
+ "apikey=" |
|
|
|
+ data["apikey"] |
|
|
|
) |
|
|
|
# print(url) |
|
|
|
if not downloaded_html: |
|
|
|
html = await agetjson(session, url) |
|
|
|
ver = {"type": "magicwords"} |
|
|
|
versions.append(ver) |
|
|
|
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull. |
|
|
|
# try: |
|
|
|
ver["code"] = html["_code"] |
|
|
|
|
|
|
|
if html["_code"] == 200: |
|
|
|
try: |
|
|
|
html = html["data"]["html"] |
|
|
|
html_body = html["data"]["html"] |
|
|
|
ver["path"] = p + ".magicwords.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
for magic_word in magic_words: |
|
|
|
replace_word = ( |
|
|
|
"<span class='highlight'>" + magic_word + "</span>" |
|
|
|
) |
|
|
|
if magic_word in html: |
|
|
|
html = html.replace(magic_word, replace_word) |
|
|
|
if magic_word in html_body: |
|
|
|
html_body = html_body.replace(magic_word, replace_word) |
|
|
|
doc = html5lib.parse( |
|
|
|
html, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
html_body, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
# INSERT MAGIC WORDS HIGHLIGHTING STUFF HERE!!! |
|
|
|
html5tidy( |
|
|
|
doc, |
|
|
|
indent=True, |
|
|
@ -551,8 +517,6 @@ async def handle_pad(args, padid, data, info, session): |
|
|
|
async with await trio.open_file(ver["path"], "w") as f: |
|
|
|
output = ET.tostring(doc, method="html", encoding="unicode") |
|
|
|
await f.write(output) |
|
|
|
except TypeError: |
|
|
|
ver["message"] = html["message"] |
|
|
|
|
|
|
|
# output meta |
|
|
|
if args.all or args.meta: |
|
|
@ -586,7 +550,9 @@ async def handle_pads(args): |
|
|
|
start = time.time() |
|
|
|
async with trio.open_nursery() as nursery: |
|
|
|
for padid in padids: |
|
|
|
nursery.start_soon(handle_pad, args, padid, data, info, session) |
|
|
|
nursery.start_soon( |
|
|
|
handle_pad, args, padid, data.copy(), info, session |
|
|
|
) |
|
|
|
end = time.time() |
|
|
|
timeit = round(end - start, 2) |
|
|
|
|
|
|
|