work in progress around the magic words
This commit is contained in:
parent
8fd2abf8f4
commit
923cc11beb
@ -173,6 +173,7 @@ def build_argument_parser(args):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--magic-words",
|
"--magic-words",
|
||||||
default=False,
|
default=False,
|
||||||
|
action="store_true",
|
||||||
help="store all magic words used in a page in the meta.json file",
|
help="store all magic words used in a page in the meta.json file",
|
||||||
)
|
)
|
||||||
return parser
|
return parser
|
||||||
@ -379,32 +380,10 @@ async def handle_pad(args, padid, data, info, session):
|
|||||||
##########################################
|
##########################################
|
||||||
## INCLUDE __XXX__ MAGIC WORDS
|
## INCLUDE __XXX__ MAGIC WORDS
|
||||||
##########################################
|
##########################################
|
||||||
pattern = r'[__\w+?__]'
|
pattern = r'__[a-zA-Z0-9]+?__'
|
||||||
magic_words = re.match(pattern, string)
|
magic_words = re.findall(pattern, text)
|
||||||
magic_words = magic_words.groups()
|
if magic_words:
|
||||||
print(magic_words)
|
print('FOUND MAGIC WORD(s): {} in {}'.format(magic_words, padid))
|
||||||
if args.publish_opt_in and args.publish not in text:
|
|
||||||
await try_deleting(
|
|
||||||
(
|
|
||||||
p + raw_ext,
|
|
||||||
p + ".raw.html",
|
|
||||||
p + ".diff.html",
|
|
||||||
p + ".meta.json",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
print("[ ] {} (deleted, reason: publish opt-out)".format(padid))
|
|
||||||
skipped += 1
|
|
||||||
return False
|
|
||||||
|
|
||||||
ver["path"] = p + raw_ext
|
|
||||||
ver["url"] = quote(ver["path"])
|
|
||||||
async with await trio.open_file(ver["path"], "w") as f:
|
|
||||||
try:
|
|
||||||
# Note(decentral1se): unicode handling...
|
|
||||||
safe_text = text.encode("utf-8", "replace").decode()
|
|
||||||
await f.write(safe_text)
|
|
||||||
except Exception as exception:
|
|
||||||
print("PANIC: {}".format(exception))
|
|
||||||
|
|
||||||
links = []
|
links = []
|
||||||
if args.css:
|
if args.css:
|
||||||
@ -489,32 +468,32 @@ async def handle_pad(args, padid, data, info, session):
|
|||||||
# mb: line causing the error of not writing the correct HTML content to the correct HTML file:
|
# mb: line causing the error of not writing the correct HTML content to the correct HTML file:
|
||||||
# url = info["localapiurl"] + "getHTML?" + urlencode(data)
|
# url = info["localapiurl"] + "getHTML?" + urlencode(data)
|
||||||
# mb: warning, HACK! Catching the error by writing the API request url manually ...
|
# mb: warning, HACK! Catching the error by writing the API request url manually ...
|
||||||
url = info["localapiurl"] + "getHTML?" + "padID=" + padid + "&" + 'apikey=' + data["apikey"] + '&startRev=0'
|
url = info["localapiurl"] + "getHTML?" + "padID=" + padid + "&" + 'apikey=' + data["apikey"]
|
||||||
# print(url)
|
# print(url)
|
||||||
html = await agetjson(session, url)
|
html = await agetjson(session, url)
|
||||||
ver = {"type": "html"}
|
ver = {"type": "html"}
|
||||||
versions.append(ver)
|
versions.append(ver)
|
||||||
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
|
# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
|
||||||
try:
|
# try:
|
||||||
ver["code"] = html["_code"]
|
ver["code"] = html["_code"]
|
||||||
if html["_code"] == 200:
|
if html["_code"] == 200:
|
||||||
try:
|
try:
|
||||||
html = html["data"]["html"]
|
html = html["data"]["html"]
|
||||||
ver["path"] = p + ".raw.html"
|
ver["path"] = p + ".raw.html"
|
||||||
ver["url"] = quote(ver["path"])
|
ver["url"] = quote(ver["path"])
|
||||||
doc = html5lib.parse(
|
doc = html5lib.parse(
|
||||||
html, treebuilder="etree", namespaceHTMLElements=False
|
html, treebuilder="etree", namespaceHTMLElements=False
|
||||||
)
|
)
|
||||||
html5tidy(
|
html5tidy(
|
||||||
doc, indent=True, title=padid, scripts=args.script, links=links,
|
doc, indent=True, title=padid, scripts=args.script, links=links,
|
||||||
)
|
)
|
||||||
async with await trio.open_file(ver["path"], "w") as f:
|
async with await trio.open_file(ver["path"], "w") as f:
|
||||||
output = ET.tostring(doc, method="html", encoding="unicode")
|
output = ET.tostring(doc, method="html", encoding="unicode")
|
||||||
await f.write(output)
|
await f.write(output)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
ver["message"] = html["message"]
|
ver["message"] = html["message"]
|
||||||
except Exception as exception:
|
# except Exception as exception:
|
||||||
print("PANIC: {}".format(exception))
|
# print("PANIC: {}".format(exception))
|
||||||
|
|
||||||
# output meta
|
# output meta
|
||||||
if args.all or args.meta:
|
if args.all or args.meta:
|
||||||
|
Loading…
Reference in New Issue
Block a user