updated
This commit is contained in:
parent
f30aafb5c7
commit
d3732a1aee
12
etherdump
12
etherdump
@ -7,11 +7,11 @@ from datetime import datetime
|
|||||||
from xml.etree import cElementTree as ET
|
from xml.etree import cElementTree as ET
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urllib2 import urlopen, HTTPError, URLError
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
# dependencies
|
|
||||||
import html5lib, jinja2
|
|
||||||
# local mods
|
# local mods
|
||||||
from trim import trim_removed_spans, contents, set_text_contents, text_contents
|
from et_helpers import trim_removed_spans, contents, set_text_contents, text_contents
|
||||||
from linkify import linkify, urlify, filename_to_padid
|
from linkify import linkify, urlify, filename_to_padid
|
||||||
|
# external dependencies (use pip to install these)
|
||||||
|
import html5lib, jinja2
|
||||||
|
|
||||||
|
|
||||||
def get_template_env (tpath=None):
|
def get_template_env (tpath=None):
|
||||||
@ -100,7 +100,6 @@ while len(todo) > 0:
|
|||||||
os.makedirs(args.path)
|
os.makedirs(args.path)
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
# print ("{0}".format(padid).encode("utf-8"), file=sys.stderr)
|
|
||||||
|
|
||||||
# _
|
# _
|
||||||
# _ __ ___ ___| |_ __ _
|
# _ __ ___ ___| |_ __ _
|
||||||
@ -184,6 +183,9 @@ while len(todo) > 0:
|
|||||||
if args.spider:
|
if args.spider:
|
||||||
for l in links:
|
for l in links:
|
||||||
if l not in todo and l not in done:
|
if l not in todo and l not in done:
|
||||||
|
if l.startswith("http://") or l.startswith("https://"):
|
||||||
|
print ("Ignoring absolute URL in [[ link ]] form", file=sys.stderr)
|
||||||
|
continue
|
||||||
# if args.verbose:
|
# if args.verbose:
|
||||||
# print (" link: {0}".format(l), file=sys.stderr)
|
# print (" link: {0}".format(l), file=sys.stderr)
|
||||||
todo.append(l)
|
todo.append(l)
|
||||||
@ -237,8 +239,6 @@ while len(todo) > 0:
|
|||||||
a.remove(br); a.insert(1, br)
|
a.remove(br); a.insert(1, br)
|
||||||
img.attrib['src'] = href
|
img.attrib['src'] = href
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# extract the style tag (with authorship colors)
|
# extract the style tag (with authorship colors)
|
||||||
style = t.find(".//style")
|
style = t.find(".//style")
|
||||||
if style != None:
|
if style != None:
|
||||||
|
Loading…
Reference in New Issue
Block a user