|
|
@ -470,22 +470,25 @@ async def handle_pad(args, padid, data, info, session): |
|
|
|
downloaded_html = True |
|
|
|
|
|
|
|
if html["_code"] == 200: |
|
|
|
html_body = html["data"]["html"] |
|
|
|
ver["path"] = p + ".raw.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
doc = html5lib.parse( |
|
|
|
html_body, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
html5tidy( |
|
|
|
doc, |
|
|
|
indent=True, |
|
|
|
title=padid, |
|
|
|
scripts=args.script, |
|
|
|
links=links, |
|
|
|
) |
|
|
|
async with await trio.open_file(ver["path"], "w") as f: |
|
|
|
output = ET.tostring(doc, method="html", encoding="unicode") |
|
|
|
await f.write(output) |
|
|
|
try: |
|
|
|
html_body = html["data"]["html"] |
|
|
|
ver["path"] = p + ".raw.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
doc = html5lib.parse( |
|
|
|
html_body, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
html5tidy( |
|
|
|
doc, |
|
|
|
indent=True, |
|
|
|
title=padid, |
|
|
|
scripts=args.script, |
|
|
|
links=links, |
|
|
|
) |
|
|
|
async with await trio.open_file(ver["path"], "w") as f: |
|
|
|
output = ET.tostring(doc, method="html", encoding="unicode") |
|
|
|
await f.write(output) |
|
|
|
except TypeError: |
|
|
|
ver["message"] = html["message"] |
|
|
|
|
|
|
|
if args.all or args.magicwords: |
|
|
|
if not downloaded_html: |
|
|
@ -495,28 +498,31 @@ async def handle_pad(args, padid, data, info, session): |
|
|
|
ver["code"] = html["_code"] |
|
|
|
|
|
|
|
if html["_code"] == 200: |
|
|
|
html_body = html["data"]["html"] |
|
|
|
ver["path"] = p + ".magicwords.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
for magic_word in magic_words: |
|
|
|
replace_word = ( |
|
|
|
"<span class='highlight'>" + magic_word + "</span>" |
|
|
|
try: |
|
|
|
html_body = html["data"]["html"] |
|
|
|
ver["path"] = p + ".magicwords.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
for magic_word in magic_words: |
|
|
|
replace_word = ( |
|
|
|
"<span class='highlight'>" + magic_word + "</span>" |
|
|
|
) |
|
|
|
if magic_word in html_body: |
|
|
|
html_body = html_body.replace(magic_word, replace_word) |
|
|
|
doc = html5lib.parse( |
|
|
|
html_body, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
if magic_word in html_body: |
|
|
|
html_body = html_body.replace(magic_word, replace_word) |
|
|
|
doc = html5lib.parse( |
|
|
|
html_body, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
html5tidy( |
|
|
|
doc, |
|
|
|
indent=True, |
|
|
|
title=padid, |
|
|
|
scripts=args.script, |
|
|
|
links=links, |
|
|
|
) |
|
|
|
async with await trio.open_file(ver["path"], "w") as f: |
|
|
|
output = ET.tostring(doc, method="html", encoding="unicode") |
|
|
|
await f.write(output) |
|
|
|
html5tidy( |
|
|
|
doc, |
|
|
|
indent=True, |
|
|
|
title=padid, |
|
|
|
scripts=args.script, |
|
|
|
links=links, |
|
|
|
) |
|
|
|
async with await trio.open_file(ver["path"], "w") as f: |
|
|
|
output = ET.tostring(doc, method="html", encoding="unicode") |
|
|
|
await f.write(output) |
|
|
|
except TypeError: |
|
|
|
ver["message"] = html["message"] |
|
|
|
|
|
|
|
# output meta |
|
|
|
if args.all or args.meta: |
|
|
|