diff --git a/etherdump/commands/common.py b/etherdump/commands/common.py
index 7e08b8f..e8907ed 100644
--- a/etherdump/commands/common.py
+++ b/etherdump/commands/common.py
@@ -77,3 +77,34 @@ def progressbar (i, num, label="", file=sys.stderr):
     msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)
     sys.stderr.write(msg.encode("utf-8"))
     sys.stderr.flush()
+
+
+
+# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
+import re, htmlentitydefs
+##
+# Removes HTML or XML character references and entities from a text string.
+#
+# @param text The HTML (or XML) source text.
+# @return The plain text, as a Unicode string, if necessary.
+def unescape(text):
+    def fixup(m):
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return unichr(int(text[3:-1], 16))
+                else:
+                    return unichr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
+
diff --git a/etherdump/commands/pull.py b/etherdump/commands/pull.py
index 4f7031c..0a3c796 100644
--- a/etherdump/commands/pull.py
+++ b/etherdump/commands/pull.py
@@ -10,7 +10,8 @@ from time import sleep
 from html5tidy import html5tidy
 import html5lib
 from xml.etree import ElementTree as ET 
-
+# debugging
+# import ElementTree as ET 
 
 """
 pull(meta):
@@ -94,7 +95,7 @@ storing enough information to reconstruct (or understand an error occurred)
                 meta['padid'] = padid.encode("utf-8")
                 versions = meta["versions"] = []
                 versions.append({
-                    "url": padurlbase + padid.encode("utf-8"),
+                    "url": padurlbase + quote(padid.encode("utf-8")), # this quote was really important for dealing with rogue chars like \xa0 in a padid;
                     "type": "pad",
                     "code": 200
                 })
@@ -174,11 +175,11 @@ storing enough information to reconstruct (or understand an error occurred)
                 html = html['data']['html']
                 ver["path"] = p+".diff.html"
                 ver["url"] = quote(ver["path"])
-                doc = html5lib.parse(html, namespaceHTMLElements=False)
+                doc = html5lib.parse(html.encode("utf-8"), encoding="utf-8", namespaceHTMLElements=False)
                 html5tidy(doc, indent=True, title=padid, scripts="../versions.js", links=links)
                 with open(ver["path"], "w") as f:
                     # f.write(html.encode("utf-8"))
-                    print (ET.tostring(doc, method="html", encoding="unicode").encode("utf-8"), file=f)
+                    print(ET.tostring(doc, method="html", encoding="utf-8"), file=f)
 
         # Process text, html, dhtml, all options
         if args.all or args.html:
@@ -190,12 +191,11 @@ storing enough information to reconstruct (or understand an error occurred)
                 html = html['data']['html']
                 ver["path"] = p+".raw.html"
                 ver["url"] = quote(ver["path"])
-
                 doc = html5lib.parse(html, namespaceHTMLElements=False)
                 html5tidy(doc, indent=True, title=padid, scripts="../versions.js", links=links)
                 with open(ver["path"], "w") as f:
                     # f.write(html.encode("utf-8"))
-                    print (ET.tostring(doc, method="html", encoding="unicode").encode("utf-8"), file=f)
+                    print (ET.tostring(doc, method="html", encoding="utf-8"), file=f)
 
         # output meta
         if args.all or args.meta: