updated
This commit is contained in:
parent
f30aafb5c7
commit
d3732a1aee
12
etherdump
12
etherdump
@ -7,11 +7,11 @@ from datetime import datetime
|
||||
from xml.etree import cElementTree as ET
|
||||
from urllib import urlencode
|
||||
from urllib2 import urlopen, HTTPError, URLError
|
||||
# dependencies
|
||||
import html5lib, jinja2
|
||||
# local mods
|
||||
from trim import trim_removed_spans, contents, set_text_contents, text_contents
|
||||
from et_helpers import trim_removed_spans, contents, set_text_contents, text_contents
|
||||
from linkify import linkify, urlify, filename_to_padid
|
||||
# external dependencies (use pip to install these)
|
||||
import html5lib, jinja2
|
||||
|
||||
|
||||
def get_template_env (tpath=None):
|
||||
@ -100,7 +100,6 @@ while len(todo) > 0:
|
||||
os.makedirs(args.path)
|
||||
except OSError:
|
||||
pass
|
||||
# print ("{0}".format(padid).encode("utf-8"), file=sys.stderr)
|
||||
|
||||
# _
|
||||
# _ __ ___ ___| |_ __ _
|
||||
@ -184,6 +183,9 @@ while len(todo) > 0:
|
||||
if args.spider:
|
||||
for l in links:
|
||||
if l not in todo and l not in done:
|
||||
if l.startswith("http://") or l.startswith("https://"):
|
||||
print ("Ignoring absolute URL in [[ link ]] form", file=sys.stderr)
|
||||
continue
|
||||
# if args.verbose:
|
||||
# print (" link: {0}".format(l), file=sys.stderr)
|
||||
todo.append(l)
|
||||
@ -237,8 +239,6 @@ while len(todo) > 0:
|
||||
a.remove(br); a.insert(1, br)
|
||||
img.attrib['src'] = href
|
||||
|
||||
|
||||
|
||||
# extract the style tag (with authorship colors)
|
||||
style = t.find(".//style")
|
||||
if style != None:
|
||||
|
Loading…
Reference in New Issue
Block a user