From b0f4a7e460833ab2aa7396e78e9854956b4e2f8f Mon Sep 17 00:00:00 2001 From: manetta Date: Fri, 4 Dec 2020 15:49:25 +0100 Subject: [PATCH] next driver --- cron.sh | 4 ++-- etherpump/commands/pull.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/cron.sh b/cron.sh index eb48aae..1b1d994 100644 --- a/cron.sh +++ b/cron.sh @@ -1,10 +1,10 @@ # This will pump all the pads with a __PUBLISH__ tag into a folder "publish" as meta.json, txt, html and dhtml -poetry run etherpump pull --meta --html --text --publish-opt-in --publish __PUBLISH__ --pub publish --css ../stylesheet.css --fix-names +etherpump pull --meta --html --text --publish-opt-in --publish __PUBLISH__ --pub publish --css ../stylesheet.css --fix-names echo "Making the Etherpump index now ..." # This will make an index for the dump -poetry run etherpump index input \ +etherpump index input \ publish/*.meta.json \ --templatepath templates \ --title "Notes, __MAGICWORDS__, readers & more ..." \ diff --git a/etherpump/commands/pull.py b/etherpump/commands/pull.py index 98fce0e..e5b7fec 100644 --- a/etherpump/commands/pull.py +++ b/etherpump/commands/pull.py @@ -171,7 +171,7 @@ def build_argument_parser(args): help="ensure `--publish` is honoured instead of `--nopublish`", ) parser.add_argument( - "--magic-words", + "--magicwords", default=False, action="store_true", help="store all magic words used in a page in the meta.json file", @@ -375,7 +375,7 @@ async def handle_pad(args, padid, data, info, session): # include magic words - if args.magic_words: + if args.magicwords: ########################################## ## INCLUDE __XXX__ MAGIC WORDS @@ -383,6 +383,7 @@ async def handle_pad(args, padid, data, info, session): pattern = r'__[a-zA-Z0-9]+?__' magic_words = re.findall(pattern, text) if magic_words: + meta["magicwords"] = magic_words print('FOUND MAGIC WORD(s): {} in {}'.format(magic_words, padid)) links = [] @@ -495,6 +496,33 @@ async def handle_pad(args, padid, data, info, session): # except Exception as exception: # print("PANIC: {}".format(exception)) + if args.all or args.magicwords: + url = info["localapiurl"] + "getHTML?" + "padID=" + padid + "&" + 'apikey=' + data["apikey"] + # print(url) + html = await agetjson(session, url) + ver = {"type": "magicwords"} + versions.append(ver) + # mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull. + # try: + ver["code"] = html["_code"] + if html["_code"] == 200: + try: + html = html["data"]["html"] + ver["path"] = p + ".magicwords.html" + ver["url"] = quote(ver["path"]) + doc = html5lib.parse( + html, treebuilder="etree", namespaceHTMLElements=False + ) + # INSERT MAGIC WORDS HIGHLIGHTING STUFF HERE!!! + html5tidy( + doc, indent=True, title=padid, scripts=args.script, links=links, + ) + async with await trio.open_file(ver["path"], "w") as f: + output = ET.tostring(doc, method="html", encoding="unicode") + await f.write(output) + except TypeError: + ver["message"] = html["message"] + # output meta if args.all or args.meta: ver = {"type": "meta"}