Browse Source

updated

add-quote-import
Michael Murtaugh 9 years ago
parent
commit
d3732a1aee
  1. 0
      et_helpers.py
  2. 12
      etherdump

0
trim.py → et_helpers.py

12
etherdump

@ -7,11 +7,11 @@ from datetime import datetime
from xml.etree import cElementTree as ET
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
# dependencies
import html5lib, jinja2
# local mods
from trim import trim_removed_spans, contents, set_text_contents, text_contents
from et_helpers import trim_removed_spans, contents, set_text_contents, text_contents
from linkify import linkify, urlify, filename_to_padid
# external dependencies (use pip to install these)
import html5lib, jinja2
def get_template_env (tpath=None):
@ -100,7 +100,6 @@ while len(todo) > 0:
os.makedirs(args.path)
except OSError:
pass
# print ("{0}".format(padid).encode("utf-8"), file=sys.stderr)
# _
# _ __ ___ ___| |_ __ _
@ -184,6 +183,9 @@ while len(todo) > 0:
if args.spider:
for l in links:
if l not in todo and l not in done:
if l.startswith("http://") or l.startswith("https://"):
print ("Ignoring absolute URL in [[ link ]] form", file=sys.stderr)
continue
# if args.verbose:
# print (" link: {0}".format(l), file=sys.stderr)
todo.append(l)
@ -237,8 +239,6 @@ while len(todo) > 0:
a.remove(br); a.insert(1, br)
img.attrib['src'] = href
# extract the style tag (with authorship colors)
style = t.find(".//style")
if style != None:

Loading…
Cancel
Save