From e5077105b88bc9beb03e558aa50ceb101e41727e Mon Sep 17 00:00:00 2001
From: Luke Murphy <lukewm@riseup.net>
Date: Thu, 10 Dec 2020 17:31:34 +0100
Subject: [PATCH] Run formatter with make

---
 etherpump/api/__init__.py  |    2 +-
 etherpump/commands/pull.py | 1081 ++++++++++++++++++------------------
 2 files changed, 556 insertions(+), 527 deletions(-)

diff --git a/etherpump/api/__init__.py b/etherpump/api/__init__.py
index 8bcca11..262d0fe 100644
--- a/etherpump/api/__init__.py
+++ b/etherpump/api/__init__.py
@@ -11,7 +11,7 @@ from etherpump.commands.gethtml import main as gethtml  # noqa
 from etherpump.commands.gettext import main as gettext  # noqa
 from etherpump.commands.index import main as index  # noqa
 from etherpump.commands.init import main  # noqa
-from etherpump.commands.init import main as init  # noqa
+from etherpump.commands.init import main as init
 from etherpump.commands.list import main as list  # noqa
 from etherpump.commands.listauthors import main as listauthors  # noqa
 from etherpump.commands.publication import main as publication  # noqa
diff --git a/etherpump/commands/pull.py b/etherpump/commands/pull.py
index d1e571f..adbd32a 100644
--- a/etherpump/commands/pull.py
+++ b/etherpump/commands/pull.py
@@ -31,549 +31,578 @@ skipped, saved = 0, 0
 
 
 async def try_deleting(files):
-	for f in files:
-		try:
-			path = trio.Path(f)
-			if os.path.exists(path):
-				await path.rmdir()
-		except Exception as exception:
-			print("PANIC: {}".format(exception))
+    for f in files:
+        try:
+            path = trio.Path(f)
+            if os.path.exists(path):
+                await path.rmdir()
+        except Exception as exception:
+            print("PANIC: {}".format(exception))
 
 
 def build_argument_parser(args):
-	parser = ArgumentParser(
-		"Check for pads that have changed since last sync (according to .meta.json)"
-	)
-	parser.add_argument("padid", nargs="*", default=[])
-	parser.add_argument(
-		"--glob", default=False, help="download pads matching a glob pattern"
-	)
-	parser.add_argument(
-		"--padinfo",
-		default=".etherpump/settings.json",
-		help="settings, default: .etherpump/settings.json",
-	)
-	parser.add_argument(
-		"--zerorevs",
-		default=False,
-		action="store_true",
-		help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
-	)
-	parser.add_argument(
-		"--pub",
-		default="p",
-		help="folder to store files for public pads, default: p",
-	)
-	parser.add_argument(
-		"--group",
-		default="g",
-		help="folder to store files for group pads, default: g",
-	)
-	parser.add_argument(
-		"--skip",
-		default=None,
-		type=int,
-		help="skip this many items, default: None",
-	)
-	parser.add_argument(
-		"--connection",
-		default=200,
-		type=int,
-		help="number of connections to run concurrently",
-	)
-	parser.add_argument(
-		"--meta",
-		default=False,
-		action="store_true",
-		help="download meta to PADID.meta.json, default: False",
-	)
-	parser.add_argument(
-		"--text",
-		default=False,
-		action="store_true",
-		help="download text to PADID.txt, default: False",
-	)
-	parser.add_argument(
-		"--html",
-		default=False,
-		action="store_true",
-		help="download html to PADID.html, default: False",
-	)
-	parser.add_argument(
-		"--dhtml",
-		default=False,
-		action="store_true",
-		help="download dhtml to PADID.diff.html, default: False",
-	)
-	parser.add_argument(
-		"--all",
-		default=False,
-		action="store_true",
-		help="download all files (meta, text, html, dhtml), default: False",
-	)
-	parser.add_argument(
-		"--folder",
-		default=False,
-		action="store_true",
-		help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",
-	)
-	parser.add_argument(
-		"--output",
-		default=False,
-		action="store_true",
-		help="output changed padids on stdout",
-	)
-	parser.add_argument(
-		"--force",
-		default=False,
-		action="store_true",
-		help="reload, even if revisions count matches previous",
-	)
-	parser.add_argument(
-		"--no-raw-ext",
-		default=False,
-		action="store_true",
-		help="save plain text as padname with no (additional) extension",
-	)
-	parser.add_argument(
-		"--fix-names",
-		default=False,
-		action="store_true",
-		help="normalize padid's (no spaces, special control chars) for use in file names",
-	)
-	parser.add_argument(
-		"--filter-ext", default=None, help="filter pads by extension"
-	)
-	parser.add_argument(
-		"--css",
-		default="/styles.css",
-		help="add css url to output pages, default: /styles.css",
-	)
-	parser.add_argument(
-		"--script",
-		default="/versions.js",
-		help="add script url to output pages, default: /versions.js",
-	)
-	parser.add_argument(
-		"--nopublish",
-		default="__NOPUBLISH__",
-		help="no publish magic word, default: __NOPUBLISH__",
-	)
-	parser.add_argument(
-		"--publish",
-		default="__PUBLISH__",
-		help="the publish magic word, default: __PUBLISH__",
-	)
-	parser.add_argument(
-		"--publish-opt-in",
-		default=False,
-		action="store_true",
-		help="ensure `--publish` is honoured instead of `--nopublish`",
-	)
-	parser.add_argument(
-		"--magicwords",
-		default=False,
-		action="store_true",
-		help="store all magic words used in a page in the meta.json file",
-	)
-	return parser
+    parser = ArgumentParser(
+        "Check for pads that have changed since last sync (according to .meta.json)"
+    )
+    parser.add_argument("padid", nargs="*", default=[])
+    parser.add_argument(
+        "--glob", default=False, help="download pads matching a glob pattern"
+    )
+    parser.add_argument(
+        "--padinfo",
+        default=".etherpump/settings.json",
+        help="settings, default: .etherpump/settings.json",
+    )
+    parser.add_argument(
+        "--zerorevs",
+        default=False,
+        action="store_true",
+        help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
+    )
+    parser.add_argument(
+        "--pub",
+        default="p",
+        help="folder to store files for public pads, default: p",
+    )
+    parser.add_argument(
+        "--group",
+        default="g",
+        help="folder to store files for group pads, default: g",
+    )
+    parser.add_argument(
+        "--skip",
+        default=None,
+        type=int,
+        help="skip this many items, default: None",
+    )
+    parser.add_argument(
+        "--connection",
+        default=200,
+        type=int,
+        help="number of connections to run concurrently",
+    )
+    parser.add_argument(
+        "--meta",
+        default=False,
+        action="store_true",
+        help="download meta to PADID.meta.json, default: False",
+    )
+    parser.add_argument(
+        "--text",
+        default=False,
+        action="store_true",
+        help="download text to PADID.txt, default: False",
+    )
+    parser.add_argument(
+        "--html",
+        default=False,
+        action="store_true",
+        help="download html to PADID.html, default: False",
+    )
+    parser.add_argument(
+        "--dhtml",
+        default=False,
+        action="store_true",
+        help="download dhtml to PADID.diff.html, default: False",
+    )
+    parser.add_argument(
+        "--all",
+        default=False,
+        action="store_true",
+        help="download all files (meta, text, html, dhtml), default: False",
+    )
+    parser.add_argument(
+        "--folder",
+        default=False,
+        action="store_true",
+        help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",
+    )
+    parser.add_argument(
+        "--output",
+        default=False,
+        action="store_true",
+        help="output changed padids on stdout",
+    )
+    parser.add_argument(
+        "--force",
+        default=False,
+        action="store_true",
+        help="reload, even if revisions count matches previous",
+    )
+    parser.add_argument(
+        "--no-raw-ext",
+        default=False,
+        action="store_true",
+        help="save plain text as padname with no (additional) extension",
+    )
+    parser.add_argument(
+        "--fix-names",
+        default=False,
+        action="store_true",
+        help="normalize padid's (no spaces, special control chars) for use in file names",
+    )
+    parser.add_argument(
+        "--filter-ext", default=None, help="filter pads by extension"
+    )
+    parser.add_argument(
+        "--css",
+        default="/styles.css",
+        help="add css url to output pages, default: /styles.css",
+    )
+    parser.add_argument(
+        "--script",
+        default="/versions.js",
+        help="add script url to output pages, default: /versions.js",
+    )
+    parser.add_argument(
+        "--nopublish",
+        default="__NOPUBLISH__",
+        help="no publish magic word, default: __NOPUBLISH__",
+    )
+    parser.add_argument(
+        "--publish",
+        default="__PUBLISH__",
+        help="the publish magic word, default: __PUBLISH__",
+    )
+    parser.add_argument(
+        "--publish-opt-in",
+        default=False,
+        action="store_true",
+        help="ensure `--publish` is honoured instead of `--nopublish`",
+    )
+    parser.add_argument(
+        "--magicwords",
+        default=False,
+        action="store_true",
+        help="store all magic words used in a page in the meta.json file",
+    )
+    return parser
 
 
 async def get_padids(args, info, data, session):
-	if args.padid:
-		padids = args.padid
-	elif args.glob:
-		url = info["localapiurl"] + "listAllPads?" + urlencode(data)
-		padids = await agetjson(session, url)
-		padids = padids["data"]["padIDs"]
-		padids = [x for x in padids if fnmatch(x, args.glob)]
-	else:
-		url = info["localapiurl"] + "listAllPads?" + urlencode(data)
-		padids = await agetjson(session, url)
-		padids = padids["data"]["padIDs"]
-
-	padids.sort()
-	return padids
+    if args.padid:
+        padids = args.padid
+    elif args.glob:
+        url = info["localapiurl"] + "listAllPads?" + urlencode(data)
+        padids = await agetjson(session, url)
+        padids = padids["data"]["padIDs"]
+        padids = [x for x in padids if fnmatch(x, args.glob)]
+    else:
+        url = info["localapiurl"] + "listAllPads?" + urlencode(data)
+        padids = await agetjson(session, url)
+        padids = padids["data"]["padIDs"]
+
+    padids.sort()
+    return padids
 
 
 async def handle_pad(args, padid, data, info, session):
-	global skipped, saved
-
-	raw_ext = ".raw.txt"
-	if args.no_raw_ext:
-		raw_ext = ""
-
-	data["padID"] = padid
-	p = padpath(padid, args.pub, args.group, args.fix_names)
-	if args.folder:
-		p = os.path.join(p, padid)
-
-	metapath = p + ".meta.json"
-	revisions = None
-	tries = 1
-	skip = False
-	padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
-	meta = {}
-
-	while True:
-		try:
-			if os.path.exists(metapath):
-				async with await trio.open_file(metapath) as f:
-					contents = await f.read()
-					meta.update(json.loads(contents))
-				url = (
-					info["localapiurl"] + "getRevisionsCount?" + urlencode(data)
-				)
-				response = await agetjson(session, url)
-				revisions = response["data"]["revisions"]
-				if meta["revisions"] == revisions and not args.force:
-					skip = True
-					reason = "No new revisions, we already have the latest local copy"
-					break
-
-			meta["padid"] = padid
-			versions = meta["versions"] = []
-			versions.append(
-				{"url": padurlbase + quote(padid), "type": "pad", "code": 200,}
-			)
-
-			if revisions is None:
-				url = (
-					info["localapiurl"] + "getRevisionsCount?" + urlencode(data)
-				)
-				response = await agetjson(session, url)
-				meta["revisions"] = response["data"]["revisions"]
-			else:
-				meta["revisions"] = revisions
-
-			if (meta["revisions"] == 0) and (not args.zerorevs):
-				skip = True
-				reason = "0 revisions, this pad was never edited"
-				break
-
-			# todo: load more metadata!
-			meta["group"], meta["pad"] = splitpadname(padid)
-			meta["pathbase"] = p
-
-			url = info["localapiurl"] + "getLastEdited?" + urlencode(data)
-			response = await agetjson(session, url)
-			meta["lastedited_raw"] = int(response["data"]["lastEdited"])
-
-			meta["lastedited_iso"] = datetime.fromtimestamp(
-				int(meta["lastedited_raw"]) / 1000
-			).isoformat()
-
-			url = info["localapiurl"] + "listAuthorsOfPad?" + urlencode(data)
-			response = await agetjson(session, url)
-			meta["author_ids"] = response["data"]["authorIDs"]
-
-			break
-		except HTTPError as e:
-			tries += 1
-			if tries > 3:
-				print(
-					"Too many failures ({0}), skipping".format(padid),
-					file=sys.stderr,
-				)
-				skip = True
-				reason = "PANIC, couldn't download the pad contents"
-				break
-			else:
-				await trio.sleep(1)
-		except TypeError as e:
-			print(
-				"Type Error loading pad {0} (phantom pad?), skipping".format(
-					padid
-				),
-				file=sys.stderr,
-			)
-			skip = True
-			reason = "PANIC, couldn't download the pad contents"
-			break
-
-	# Note(decentral1se): cannot track this bug down but basically the `data`
-	# and `padid` are getting out of sync and it is ending up that the same pad
-	# over and over again is downloaded. This resets things in a way that it
-	# works. This is a hack and one day TM I will find out how to fix it proper
-	data["padID"] = padid
-
-	if skip:
-		print("[ ] {} (skipped, reason: {})".format(padid, reason))
-		skipped += 1
-		return
-
-	if args.output:
-		print(padid)
-
-	if args.all or (args.meta or args.text or args.html or args.dhtml):
-		try:
-			path = trio.Path(os.path.split(metapath)[0])
-			if not os.path.exists(path):
-				await path.mkdir()
-		except OSError:
-			# Note(decentral1se): the path already exists
-			pass
-
-	if args.all or args.text:
-		url = info["localapiurl"] + "getText?" + urlencode(data)
-		text = await agetjson(session, url)
-		ver = {"type": "text"}
-		versions.append(ver)
-		ver["code"] = text["_code"]
-
-		if text["_code"] == 200:
-			text = text["data"]["text"]
-
-			##########################################
-			## ENFORCE __NOPUBLISH__ MAGIC WORD
-			##########################################
-			if args.nopublish in text:
-				await try_deleting(
-					(
-						p + raw_ext,
-						p + ".raw.html",
-						p + ".diff.html",
-						p + ".meta.json",
-					)
-				)
-				print(
-					"[ ] {} (deleted, reason: explicit __NOPUBLISH__)".format(
-						padid
-					)
-				)
-				skipped += 1
-				return False
-
-			##########################################
-			## ENFORCE __PUBLISH__ MAGIC WORD
-			##########################################
-			if args.publish_opt_in and args.publish not in text:
-				await try_deleting(
-					(
-						p + raw_ext,
-						p + ".raw.html",
-						p + ".diff.html",
-						p + ".meta.json",
-					)
-				)
-				print("[ ] {} (deleted, reason: publish opt-out)".format(padid))
-				skipped += 1
-				return False
-
-			ver["path"] = p + raw_ext
-			ver["url"] = quote(ver["path"])
-			async with await trio.open_file(ver["path"], "w") as f:
-				try:
-					# Note(decentral1se): unicode handling...
-					safe_text = text.encode("utf-8", "replace").decode()
-					await f.write(safe_text)
-				except Exception as exception:
-					print("PANIC: {}".format(exception))
-
-			# once the content is settled, compute a hash
-			# and link it in the metadata!
-
-
-			#  include magic words
-			if args.magicwords:
-
-				##########################################
-				## INCLUDE __XXX__ MAGIC WORDS
-				##########################################
-				pattern = r'__[a-zA-Z0-9]+?__'
-				magic_words = re.findall(pattern, text)
-				magic_words = list(set(magic_words))
-				if magic_words:
-					meta["magicwords"] = magic_words
-					print('FOUND MAGIC WORD(s): {} in {}'.format(magic_words, padid))
-
-	links = []
-	if args.css:
-		links.append({"href": args.css, "rel": "stylesheet"})
-	# todo, make this process reflect which files actually were made
-	versionbaseurl = quote(padid)
-	links.append(
-		{
-			"href": versions[0]["url"],
-			"rel": "alternate",
-			"type": "text/html",
-			"title": "Etherpad",
-		}
-	)
-	if args.all or args.text:
-		links.append(
-			{
-				"href": versionbaseurl + raw_ext,
-				"rel": "alternate",
-				"type": "text/plain",
-				"title": "Plain text",
-			}
-		)
-	if args.all or args.html:
-		links.append(
-			{
-				"href": versionbaseurl + ".raw.html",
-				"rel": "alternate",
-				"type": "text/html",
-				"title": "HTML",
-			}
-		)
-	if args.all or args.dhtml:
-		links.append(
-			{
-				"href": versionbaseurl + ".diff.html",
-				"rel": "alternate",
-				"type": "text/html",
-				"title": "HTML with author colors",
-			}
-		)
-	if args.all or args.meta:
-		links.append(
-			{
-				"href": versionbaseurl + ".meta.json",
-				"rel": "alternate",
-				"type": "application/json",
-				"title": "Meta data",
-			}
-		)
-
-	if args.all or args.dhtml:
-		data["startRev"] = "0"
-		url = info["localapiurl"] + "createDiffHTML?" + urlencode(data)
-		html = await agetjson(session, url)
-		ver = {"type": "diffhtml"}
-		versions.append(ver)
-		ver["code"] = html["_code"]
-		if html["_code"] == 200:
-			try:
-				html = html["data"]["html"]
-				ver["path"] = p + ".diff.html"
-				ver["url"] = quote(ver["path"])
-				doc = html5lib.parse(
-					html, treebuilder="etree", namespaceHTMLElements=False
-				)
-				html5tidy(
-					doc,
-					indent=True,
-					title=padid,
-					scripts=args.script,
-					links=links,
-				)
-				async with await trio.open_file(ver["path"], "w") as f:
-					output = ET.tostring(doc, method="html", encoding="unicode")
-					await f.write(output)
-			except TypeError:
-				ver["message"] = html["message"]
-
-	# Process text, html, dhtml, all options
-	if args.all or args.html:
-		# mb: line causing the error of not writing the correct HTML content to the correct HTML file:
-		# url = info["localapiurl"] + "getHTML?" + urlencode(data)
-		# mb: warning, HACK! Catching the error by writing the API request url manually ...
-		url = info["localapiurl"] + "getHTML?" + "padID=" + padid + "&" + 'apikey=' + data["apikey"]
-		# print(url)
-		html = await agetjson(session, url)
-		ver = {"type": "html"}
-		versions.append(ver)
-		# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
-		# try:
-		ver["code"] = html["_code"]
-		if html["_code"] == 200:
-			try:
-				html = html["data"]["html"]
-				ver["path"] = p + ".raw.html"
-				ver["url"] = quote(ver["path"])
-				doc = html5lib.parse(
-					html, treebuilder="etree", namespaceHTMLElements=False
-				)
-				html5tidy(
-					doc, indent=True, title=padid, scripts=args.script, links=links,
-				)
-				async with await trio.open_file(ver["path"], "w") as f:
-					output = ET.tostring(doc, method="html", encoding="unicode")
-					await f.write(output)
-			except TypeError:
-				ver["message"] = html["message"]
-		# except Exception as exception:
-			# print("PANIC: {}".format(exception))
-
-	if args.all or args.magicwords:
-		url = info["localapiurl"] + "getHTML?" + "padID=" + padid + "&" + 'apikey=' + data["apikey"]
-		# print(url)
-		html = await agetjson(session, url)
-		ver = {"type": "magicwords"}
-		versions.append(ver)
-		# mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
-		# try:
-		ver["code"] = html["_code"]
-		if html["_code"] == 200:
-				try:
-						html = html["data"]["html"]
-						ver["path"] = p + ".magicwords.html"
-						ver["url"] = quote(ver["path"])
-						for magic_word in magic_words:
-								replace_word = "<span class='highlight'>"+magic_word+"</span>"
-								if magic_word in html:
-										html = html.replace(magic_word,replace_word)
-						doc = html5lib.parse(
-								html, treebuilder="etree", namespaceHTMLElements=False
-						)
-		# INSERT MAGIC WORDS HIGHLIGHTING STUFF HERE!!!
-						html5tidy(
-								doc, indent=True, title=padid, scripts=args.script, links=links,
-						)
-						async with await trio.open_file(ver["path"], "w") as f:
-								output = ET.tostring(doc, method="html", encoding="unicode")
-								await f.write(output)
-				except TypeError:
-						ver["message"] = html["message"]
-
-	# output meta
-	if args.all or args.meta:
-		ver = {"type": "meta"}
-		versions.append(ver)
-		ver["path"] = metapath
-		ver["url"] = quote(metapath)
-		async with await trio.open_file(metapath, "w") as f:
-			await f.write(json.dumps(meta))
-
-	print("[x] {} (saved)".format(padid))
-	saved += 1
-	return
+    global skipped, saved
+
+    raw_ext = ".raw.txt"
+    if args.no_raw_ext:
+        raw_ext = ""
+
+    data["padID"] = padid
+    p = padpath(padid, args.pub, args.group, args.fix_names)
+    if args.folder:
+        p = os.path.join(p, padid)
+
+    metapath = p + ".meta.json"
+    revisions = None
+    tries = 1
+    skip = False
+    padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
+    meta = {}
+
+    while True:
+        try:
+            if os.path.exists(metapath):
+                async with await trio.open_file(metapath) as f:
+                    contents = await f.read()
+                    meta.update(json.loads(contents))
+                url = (
+                    info["localapiurl"] + "getRevisionsCount?" + urlencode(data)
+                )
+                response = await agetjson(session, url)
+                revisions = response["data"]["revisions"]
+                if meta["revisions"] == revisions and not args.force:
+                    skip = True
+                    reason = "No new revisions, we already have the latest local copy"
+                    break
+
+            meta["padid"] = padid
+            versions = meta["versions"] = []
+            versions.append(
+                {"url": padurlbase + quote(padid), "type": "pad", "code": 200,}
+            )
+
+            if revisions is None:
+                url = (
+                    info["localapiurl"] + "getRevisionsCount?" + urlencode(data)
+                )
+                response = await agetjson(session, url)
+                meta["revisions"] = response["data"]["revisions"]
+            else:
+                meta["revisions"] = revisions
+
+            if (meta["revisions"] == 0) and (not args.zerorevs):
+                skip = True
+                reason = "0 revisions, this pad was never edited"
+                break
+
+            # todo: load more metadata!
+            meta["group"], meta["pad"] = splitpadname(padid)
+            meta["pathbase"] = p
+
+            url = info["localapiurl"] + "getLastEdited?" + urlencode(data)
+            response = await agetjson(session, url)
+            meta["lastedited_raw"] = int(response["data"]["lastEdited"])
+
+            meta["lastedited_iso"] = datetime.fromtimestamp(
+                int(meta["lastedited_raw"]) / 1000
+            ).isoformat()
+
+            url = info["localapiurl"] + "listAuthorsOfPad?" + urlencode(data)
+            response = await agetjson(session, url)
+            meta["author_ids"] = response["data"]["authorIDs"]
+
+            break
+        except HTTPError as e:
+            tries += 1
+            if tries > 3:
+                print(
+                    "Too many failures ({0}), skipping".format(padid),
+                    file=sys.stderr,
+                )
+                skip = True
+                reason = "PANIC, couldn't download the pad contents"
+                break
+            else:
+                await trio.sleep(1)
+        except TypeError as e:
+            print(
+                "Type Error loading pad {0} (phantom pad?), skipping".format(
+                    padid
+                ),
+                file=sys.stderr,
+            )
+            skip = True
+            reason = "PANIC, couldn't download the pad contents"
+            break
+
+    # Note(decentral1se): cannot track this bug down but basically the `data`
+    # and `padid` are getting out of sync and it is ending up that the same pad
+    # over and over again is downloaded. This resets things in a way that it
+    # works. This is a hack and one day TM I will find out how to fix it proper
+    data["padID"] = padid
+
+    if skip:
+        print("[ ] {} (skipped, reason: {})".format(padid, reason))
+        skipped += 1
+        return
+
+    if args.output:
+        print(padid)
+
+    if args.all or (args.meta or args.text or args.html or args.dhtml):
+        try:
+            path = trio.Path(os.path.split(metapath)[0])
+            if not os.path.exists(path):
+                await path.mkdir()
+        except OSError:
+            # Note(decentral1se): the path already exists
+            pass
+
+    if args.all or args.text:
+        url = info["localapiurl"] + "getText?" + urlencode(data)
+        text = await agetjson(session, url)
+        ver = {"type": "text"}
+        versions.append(ver)
+        ver["code"] = text["_code"]
+
+        if text["_code"] == 200:
+            text = text["data"]["text"]
+
+            ##########################################
+            ## ENFORCE __NOPUBLISH__ MAGIC WORD
+            ##########################################
+            if args.nopublish in text:
+                await try_deleting(
+                    (
+                        p + raw_ext,
+                        p + ".raw.html",
+                        p + ".diff.html",
+                        p + ".meta.json",
+                    )
+                )
+                print(
+                    "[ ] {} (deleted, reason: explicit __NOPUBLISH__)".format(
+                        padid
+                    )
+                )
+                skipped += 1
+                return False
+
+            ##########################################
+            ## ENFORCE __PUBLISH__ MAGIC WORD
+            ##########################################
+            if args.publish_opt_in and args.publish not in text:
+                await try_deleting(
+                    (
+                        p + raw_ext,
+                        p + ".raw.html",
+                        p + ".diff.html",
+                        p + ".meta.json",
+                    )
+                )
+                print("[ ] {} (deleted, reason: publish opt-out)".format(padid))
+                skipped += 1
+                return False
+
+            ver["path"] = p + raw_ext
+            ver["url"] = quote(ver["path"])
+            async with await trio.open_file(ver["path"], "w") as f:
+                try:
+                    # Note(decentral1se): unicode handling...
+                    safe_text = text.encode("utf-8", "replace").decode()
+                    await f.write(safe_text)
+                except Exception as exception:
+                    print("PANIC: {}".format(exception))
+
+            # once the content is settled, compute a hash
+            # and link it in the metadata!
+
+            #  include magic words
+            if args.magicwords:
+
+                ##########################################
+                ## INCLUDE __XXX__ MAGIC WORDS
+                ##########################################
+                pattern = r"__[a-zA-Z0-9]+?__"
+                magic_words = re.findall(pattern, text)
+                magic_words = list(set(magic_words))
+                if magic_words:
+                    meta["magicwords"] = magic_words
+                    print(
+                        "FOUND MAGIC WORD(s): {} in {}".format(
+                            magic_words, padid
+                        )
+                    )
+
+    links = []
+    if args.css:
+        links.append({"href": args.css, "rel": "stylesheet"})
+    # todo, make this process reflect which files actually were made
+    versionbaseurl = quote(padid)
+    links.append(
+        {
+            "href": versions[0]["url"],
+            "rel": "alternate",
+            "type": "text/html",
+            "title": "Etherpad",
+        }
+    )
+    if args.all or args.text:
+        links.append(
+            {
+                "href": versionbaseurl + raw_ext,
+                "rel": "alternate",
+                "type": "text/plain",
+                "title": "Plain text",
+            }
+        )
+    if args.all or args.html:
+        links.append(
+            {
+                "href": versionbaseurl + ".raw.html",
+                "rel": "alternate",
+                "type": "text/html",
+                "title": "HTML",
+            }
+        )
+    if args.all or args.dhtml:
+        links.append(
+            {
+                "href": versionbaseurl + ".diff.html",
+                "rel": "alternate",
+                "type": "text/html",
+                "title": "HTML with author colors",
+            }
+        )
+    if args.all or args.meta:
+        links.append(
+            {
+                "href": versionbaseurl + ".meta.json",
+                "rel": "alternate",
+                "type": "application/json",
+                "title": "Meta data",
+            }
+        )
+
+    if args.all or args.dhtml:
+        data["startRev"] = "0"
+        url = info["localapiurl"] + "createDiffHTML?" + urlencode(data)
+        html = await agetjson(session, url)
+        ver = {"type": "diffhtml"}
+        versions.append(ver)
+        ver["code"] = html["_code"]
+        if html["_code"] == 200:
+            try:
+                html = html["data"]["html"]
+                ver["path"] = p + ".diff.html"
+                ver["url"] = quote(ver["path"])
+                doc = html5lib.parse(
+                    html, treebuilder="etree", namespaceHTMLElements=False
+                )
+                html5tidy(
+                    doc,
+                    indent=True,
+                    title=padid,
+                    scripts=args.script,
+                    links=links,
+                )
+                async with await trio.open_file(ver["path"], "w") as f:
+                    output = ET.tostring(doc, method="html", encoding="unicode")
+                    await f.write(output)
+            except TypeError:
+                ver["message"] = html["message"]
+
+    # Process text, html, dhtml, all options
+    if args.all or args.html:
+        # mb: line causing the error of not writing the correct HTML content to the correct HTML file:
+        # url = info["localapiurl"] + "getHTML?" + urlencode(data)
+        # mb: warning, HACK! Catching the error by writing the API request url manually ...
+        url = (
+            info["localapiurl"]
+            + "getHTML?"
+            + "padID="
+            + padid
+            + "&"
+            + "apikey="
+            + data["apikey"]
+        )
+        # print(url)
+        html = await agetjson(session, url)
+        ver = {"type": "html"}
+        versions.append(ver)
+        # mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
+        # try:
+        ver["code"] = html["_code"]
+        if html["_code"] == 200:
+            try:
+                html = html["data"]["html"]
+                ver["path"] = p + ".raw.html"
+                ver["url"] = quote(ver["path"])
+                doc = html5lib.parse(
+                    html, treebuilder="etree", namespaceHTMLElements=False
+                )
+                html5tidy(
+                    doc,
+                    indent=True,
+                    title=padid,
+                    scripts=args.script,
+                    links=links,
+                )
+                async with await trio.open_file(ver["path"], "w") as f:
+                    output = ET.tostring(doc, method="html", encoding="unicode")
+                    await f.write(output)
+            except TypeError:
+                ver["message"] = html["message"]
+        # except Exception as exception:
+        # print("PANIC: {}".format(exception))
+
+    if args.all or args.magicwords:
+        url = (
+            info["localapiurl"]
+            + "getHTML?"
+            + "padID="
+            + padid
+            + "&"
+            + "apikey="
+            + data["apikey"]
+        )
+        # print(url)
+        html = await agetjson(session, url)
+        ver = {"type": "magicwords"}
+        versions.append(ver)
+        # mb: warning, HACK! Added a Try and Except here, as it sometimes bumped into an error, stopping the pull.
+        # try:
+        ver["code"] = html["_code"]
+        if html["_code"] == 200:
+            try:
+                html = html["data"]["html"]
+                ver["path"] = p + ".magicwords.html"
+                ver["url"] = quote(ver["path"])
+                for magic_word in magic_words:
+                    replace_word = (
+                        "<span class='highlight'>" + magic_word + "</span>"
+                    )
+                    if magic_word in html:
+                        html = html.replace(magic_word, replace_word)
+                doc = html5lib.parse(
+                    html, treebuilder="etree", namespaceHTMLElements=False
+                )
+                # INSERT MAGIC WORDS HIGHLIGHTING STUFF HERE!!!
+                html5tidy(
+                    doc,
+                    indent=True,
+                    title=padid,
+                    scripts=args.script,
+                    links=links,
+                )
+                async with await trio.open_file(ver["path"], "w") as f:
+                    output = ET.tostring(doc, method="html", encoding="unicode")
+                    await f.write(output)
+            except TypeError:
+                ver["message"] = html["message"]
+
+    # output meta
+    if args.all or args.meta:
+        ver = {"type": "meta"}
+        versions.append(ver)
+        ver["path"] = metapath
+        ver["url"] = quote(metapath)
+        async with await trio.open_file(metapath, "w") as f:
+            await f.write(json.dumps(meta))
+
+    print("[x] {} (saved)".format(padid))
+    saved += 1
+    return
 
 
 async def handle_pads(args):
-	global skipped, saved
+    global skipped, saved
 
-	session = asks.Session(connections=args.connection)
-	info = loadpadinfo(args.padinfo)
-	data = {"apikey": info["apikey"]}
+    session = asks.Session(connections=args.connection)
+    info = loadpadinfo(args.padinfo)
+    data = {"apikey": info["apikey"]}
 
-	padids = await get_padids(args, info, data, session)
-	if args.skip:
-		padids = padids[args.skip : len(padids)]
+    padids = await get_padids(args, info, data, session)
+    if args.skip:
+        padids = padids[args.skip : len(padids)]
 
-	print("=" * 79)
-	print("Etherpump is warming up the engines ...")
-	print("=" * 79)
+    print("=" * 79)
+    print("Etherpump is warming up the engines ...")
+    print("=" * 79)
 
-	start = time.time()
-	async with trio.open_nursery() as nursery:
-		for padid in padids:
-			nursery.start_soon(handle_pad, args, padid, data, info, session)
-	end = time.time()
-	timeit = round(end - start, 2)
+    start = time.time()
+    async with trio.open_nursery() as nursery:
+        for padid in padids:
+            nursery.start_soon(handle_pad, args, padid, data, info, session)
+    end = time.time()
+    timeit = round(end - start, 2)
 
-	print("=" * 79)
-	print(
-		"Processed {} :: Skipped {} :: Saved {} :: Time {}s".format(
-			len(padids), skipped, saved, timeit
-		)
-	)
-	print("=" * 79)
+    print("=" * 79)
+    print(
+        "Processed {} :: Skipped {} :: Saved {} :: Time {}s".format(
+            len(padids), skipped, saved, timeit
+        )
+    )
+    print("=" * 79)
 
 
 def main(args):
-	p = build_argument_parser(args)
-	args = p.parse_args(args)
-	trio.run(handle_pads, args)
+    p = build_argument_parser(args)
+    args = p.parse_args(args)
+    trio.run(handle_pads, args)