Browse Source

updated

add-quote-import
Michael Murtaugh 9 years ago
parent
commit
d3732a1aee
  1. 0
      et_helpers.py
  2. 12
      etherdump

0
trim.py → et_helpers.py

12
etherdump

@ -7,11 +7,11 @@ from datetime import datetime
from xml.etree import cElementTree as ET from xml.etree import cElementTree as ET
from urllib import urlencode from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError from urllib2 import urlopen, HTTPError, URLError
# dependencies
import html5lib, jinja2
# local mods # local mods
from trim import trim_removed_spans, contents, set_text_contents, text_contents from et_helpers import trim_removed_spans, contents, set_text_contents, text_contents
from linkify import linkify, urlify, filename_to_padid from linkify import linkify, urlify, filename_to_padid
# external dependencies (use pip to install these)
import html5lib, jinja2
def get_template_env (tpath=None): def get_template_env (tpath=None):
@ -100,7 +100,6 @@ while len(todo) > 0:
os.makedirs(args.path) os.makedirs(args.path)
except OSError: except OSError:
pass pass
# print ("{0}".format(padid).encode("utf-8"), file=sys.stderr)
# _ # _
# _ __ ___ ___| |_ __ _ # _ __ ___ ___| |_ __ _
@ -184,6 +183,9 @@ while len(todo) > 0:
if args.spider: if args.spider:
for l in links: for l in links:
if l not in todo and l not in done: if l not in todo and l not in done:
if l.startswith("http://") or l.startswith("https://"):
print ("Ignoring absolute URL in [[ link ]] form", file=sys.stderr)
continue
# if args.verbose: # if args.verbose:
# print (" link: {0}".format(l), file=sys.stderr) # print (" link: {0}".format(l), file=sys.stderr)
todo.append(l) todo.append(l)
@ -237,8 +239,6 @@ while len(todo) > 0:
a.remove(br); a.insert(1, br) a.remove(br); a.insert(1, br)
img.attrib['src'] = href img.attrib['src'] = href
# extract the style tag (with authorship colors) # extract the style tag (with authorship colors)
style = t.find(".//style") style = t.find(".//style")
if style != None: if style != None:

Loading…
Cancel
Save