work in progress around the magic words
This commit is contained in:
parent
8fd2abf8f4
commit
923cc11beb
@ -173,6 +173,7 @@ def build_argument_parser(args):
|
||||
parser.add_argument(
|
||||
"--magic-words",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="store all magic words used in a page in the meta.json file",
|
||||
)
|
||||
return parser
|
||||
@ -379,32 +380,10 @@ async def handle_pad(args, padid, data, info, session):
|
||||
##########################################
|
||||
## INCLUDE __XXX__ MAGIC WORDS
|
||||
##########################################
|
||||
pattern = r'[__\w+?__]'
|
||||
magic_words = re.match(pattern, string)
|
||||
magic_words = magic_words.groups()
|
||||
print(magic_words)
|
||||
if args.publish_opt_in and args.publish not in text:
|
||||
await try_deleting(
|
||||
(
|
||||
p + raw_ext,
|
||||
p + ".raw.html",
|
||||
p + ".diff.html",
|
||||
p + ".meta.json",
|
||||
)
|
||||
)
|
||||
print("[ ] {} (deleted, reason: publish opt-out)".format(padid))
|
||||
skipped += 1
|
||||
return False
|
||||
|
||||
ver["path"] = p + raw_ext
|
||||
ver["url"] = quote(ver["path"])
|
||||
async with await trio.open_file(ver["path"], "w") as f:
|
||||
try:
|
||||
# Note(decentral1se): unicode handling...
|
||||
safe_text = text.encode("utf-8", "replace").decode()
|
||||
await f.write(safe_text)
|
||||
except Exception as exception:
|
||||
print("PANIC: {}".format(exception))
|
||||
pattern = r'__[a-zA-Z0-9]+?__'
|
||||
magic_words = re.findall(pattern, text)
|
||||
if magic_words:
|
||||
print('FOUND MAGIC WORD(s): {} in {}'.format(magic_words, padid))
|
||||
|
||||
links = []
|
||||
if args.css:
|
||||
@ -489,32 +468,32 @@ async def handle_pad(args, padid, data, info, session):
|
||||
# mb: line causing the error of not writing the correct HTML content to the correct HTML file:
|
||||
# url = info["localapiurl"] + "getHTML?" + urlencode(data)
|
||||
# mb: warning, HACK! Catching the error by writing the API request url manually ...
|
||||
url = info["localapiurl"] + "getHTML?" + "padID=" + padid + "&" + 'apikey=' + data["apikey"] + '&startRev=0'
|
||||
url = info["localapiurl"] + "getHTML?" + "padID=" + padid + "&" + 'apikey=' + data["apikey"]
|
||||
# print(url)
|
||||
html = await agetjson(session, url)
|
||||
ver = {"type": "html"}
|
||||
versions.append(ver)
|
||||
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
|
||||
try:
|
||||
ver["code"] = html["_code"]
|
||||
if html["_code"] == 200:
|
||||
try:
|
||||
html = html["data"]["html"]
|
||||
ver["path"] = p + ".raw.html"
|
||||
ver["url"] = quote(ver["path"])
|
||||
doc = html5lib.parse(
|
||||
html, treebuilder="etree", namespaceHTMLElements=False
|
||||
)
|
||||
html5tidy(
|
||||
doc, indent=True, title=padid, scripts=args.script, links=links,
|
||||
)
|
||||
async with await trio.open_file(ver["path"], "w") as f:
|
||||
output = ET.tostring(doc, method="html", encoding="unicode")
|
||||
await f.write(output)
|
||||
except TypeError:
|
||||
ver["message"] = html["message"]
|
||||
except Exception as exception:
|
||||
print("PANIC: {}".format(exception))
|
||||
# try:
|
||||
ver["code"] = html["_code"]
|
||||
if html["_code"] == 200:
|
||||
try:
|
||||
html = html["data"]["html"]
|
||||
ver["path"] = p + ".raw.html"
|
||||
ver["url"] = quote(ver["path"])
|
||||
doc = html5lib.parse(
|
||||
html, treebuilder="etree", namespaceHTMLElements=False
|
||||
)
|
||||
html5tidy(
|
||||
doc, indent=True, title=padid, scripts=args.script, links=links,
|
||||
)
|
||||
async with await trio.open_file(ver["path"], "w") as f:
|
||||
output = ET.tostring(doc, method="html", encoding="unicode")
|
||||
await f.write(output)
|
||||
except TypeError:
|
||||
ver["message"] = html["message"]
|
||||
# except Exception as exception:
|
||||
# print("PANIC: {}".format(exception))
|
||||
|
||||
# output meta
|
||||
if args.all or args.meta:
|
||||
|
Loading…
Reference in New Issue
Block a user