Browse Source

updated pull to deal with errors in diffhtml output that was halting the dump process

add-quote-import
Michael Murtaugh 6 years ago
parent
commit
63910192b9
  1. 8
      etherdump/commands/index.py
  2. 24
      etherdump/commands/pull.py

8
etherdump/commands/index.py

@ -274,9 +274,11 @@ def main (args):
versions_by_type[t] = v versions_by_type[t] = v
if "text" in versions_by_type: if "text" in versions_by_type:
with open (versions_by_type["text"]["path"]) as f: try:
p["text"] = f.read() with open (versions_by_type["text"]["path"]) as f:
p["text"] = f.read()
except FileNotFoundError:
p['text'] = ''
# ADD IN LINK TO PAD AS "link" # ADD IN LINK TO PAD AS "link"
for v in linkversions: for v in linkversions:
if v in versions_by_type: if v in versions_by_type:

24
etherdump/commands/pull.py

@ -218,15 +218,21 @@ def main (args):
versions.append(ver) versions.append(ver)
ver["code"] = html["_code"] ver["code"] = html["_code"]
if html["_code"] == 200: if html["_code"] == 200:
html = html['data']['html'] try:
ver["path"] = p+".diff.html" html = html['data']['html']
ver["url"] = quote(ver["path"]) ver["path"] = p+".diff.html"
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False) ver["url"] = quote(ver["path"])
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) # doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
with open(ver["path"], "w") as f: html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
# f.write(html.encode("utf-8")) with open(ver["path"], "w") as f:
print(ET.tostring(doc, method="html", encoding="unicode"), file=f) # f.write(html.encode("utf-8"))
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
except TypeError:
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
ver["message"] = html["message"]
# with open(ver["path"], "w") as f:
# print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)
# Process text, html, dhtml, all options # Process text, html, dhtml, all options
if args.all or args.html: if args.all or args.html:

Loading…
Cancel
Save