Browse Source

updated pull to deal with errors in diffhtml output that was halting the dump process

add-quote-import
Michael Murtaugh 7 years ago
parent
commit
63910192b9
  1. 8
      etherdump/commands/index.py
  2. 24
      etherdump/commands/pull.py

8
etherdump/commands/index.py

@ -274,9 +274,11 @@ def main (args):
versions_by_type[t] = v
if "text" in versions_by_type:
with open (versions_by_type["text"]["path"]) as f:
p["text"] = f.read()
try:
with open (versions_by_type["text"]["path"]) as f:
p["text"] = f.read()
except FileNotFoundError:
p['text'] = ''
# ADD IN LINK TO PAD AS "link"
for v in linkversions:
if v in versions_by_type:

24
etherdump/commands/pull.py

@ -218,15 +218,21 @@ def main (args):
versions.append(ver)
ver["code"] = html["_code"]
if html["_code"] == 200:
html = html['data']['html']
ver["path"] = p+".diff.html"
ver["url"] = quote(ver["path"])
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
try:
html = html['data']['html']
ver["path"] = p+".diff.html"
ver["url"] = quote(ver["path"])
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
except TypeError:
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
ver["message"] = html["message"]
# with open(ver["path"], "w") as f:
# print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)
# Process text, html, dhtml, all options
if args.all or args.html:

Loading…
Cancel
Save