From d3732a1aeefe3af9ffb4280dc50547befd42b065 Mon Sep 17 00:00:00 2001
From: Michael Murtaugh <mm@automatist.org>
Date: Tue, 25 Aug 2015 12:44:16 +0200
Subject: [PATCH] updated

---
 trim.py => et_helpers.py |  0
 etherdump                | 12 ++++++------
 2 files changed, 6 insertions(+), 6 deletions(-)
 rename trim.py => et_helpers.py (100%)

diff --git a/trim.py b/et_helpers.py
similarity index 100%
rename from trim.py
rename to et_helpers.py
diff --git a/etherdump b/etherdump
index a239b64..7133ce6 100755
--- a/etherdump
+++ b/etherdump
@@ -7,11 +7,11 @@ from datetime import datetime
 from xml.etree import cElementTree as ET 
 from urllib import urlencode
 from urllib2 import urlopen, HTTPError, URLError
-# dependencies
-import html5lib, jinja2
 # local mods
-from trim import trim_removed_spans, contents, set_text_contents, text_contents
+from et_helpers import trim_removed_spans, contents, set_text_contents, text_contents
 from linkify import linkify, urlify, filename_to_padid
+# external dependencies (use pip to install these)
+import html5lib, jinja2
 
 
 def get_template_env (tpath=None):
@@ -100,7 +100,6 @@ while len(todo) > 0:
             os.makedirs(args.path)
         except OSError:
             pass
-    # print ("{0}".format(padid).encode("utf-8"), file=sys.stderr)
 
     #                 _        
     #  _ __ ___   ___| |_ __ _ 
@@ -184,6 +183,9 @@ while len(todo) > 0:
     if args.spider:
         for l in links:
             if l not in todo and l not in done:
+                if l.startswith("http://") or l.startswith("https://"):
+                    print ("Ignoring absolute URL in [[ link ]] form", file=sys.stderr)
+                    continue
                 # if args.verbose:
                 #     print ("  link: {0}".format(l), file=sys.stderr)
                 todo.append(l)
@@ -237,8 +239,6 @@ while len(todo) > 0:
                     a.remove(br); a.insert(1, br)
                     img.attrib['src'] = href
 
-
-
     # extract the style tag (with authorship colors)
     style = t.find(".//style")
     if style != None: