removed external routines, now part of etherdump script itself

2015-09-17 11:35:02 +02:00 · 2015-09-17 11:35:02 +02:00 · 33258cdcb1
commit 33258cdcb1
parent d89c5dbd3c
2 changed files with 0 additions and 112 deletions
--- a/et_helpers.py
+++ b/et_helpers.py
@ -1,63 +0,0 @@
-from __future__ import print_function
-import html5lib, sys, re
-from xml.etree import cElementTree as ET
-
-
-def contents (element, method="html"):
-    return (element.text or '') + ''.join([ET.tostring(c, method=method) for c in element])
-
-def text_contents (element):
-    return (element.text or '') + ''.join([text_contents(c) for c in element]) + (element.tail or '')
-
-def set_text_contents (element, text):
-    """ ok this isn't really general, but works for singly wrapped elements """
-    while len(element) == 1:
-        element = element[0]
-    element.text = text
-
-def iterparent(tree):
-    for parent in tree.iter():
-        for child in parent:
-            yield parent, child
-
-def get_parent(tree, elt):
-    for parent in tree.iter():
-        for child in parent:
-            if child == elt:
-                return parent
-
-def remove_recursive (tree, elt):
-    """ Remove element and (any resulting) empty containing elements """
-    p = get_parent(tree, elt)
-    if p:
-        p.remove(elt)
-        if len(p) == 0 and (p.text == None or p.text.strip() == ""):
-            # print ("empty parent", p, file=sys.stderr)
-            remove_recursive(tree, p)
-
-
-def trim_removed_spans (t):
-    # remove <span class="removed"> and empty parents
-    for n in t.findall(".//span[@class='removed']"):
-       remove_recursive(t, n)
-    # then strip any leading br's from body
-    while True:
-        tag = t.find("./body")[0]
-        if tag.tag == "br":
-            remove_recursive(t, tag)
-        else:
-            break
-
-def trim_removed_spans_src (src):
-    t = html5lib.parse(src, namespaceHTMLElements=False)
-    trim_removed_spans(t)
-    return contents(t.find("./body"))    
-
-
-if __name__ == "__main__":
-    src = sys.stdin.read()
-    # t = html5lib.parse(src, namespaceHTMLElements=False)
-    # trim_rems_tree(t)
-    # print (ET.tostring(t))
-    print (trim_removed_spans_src(src).encode("utf-8"))
-
--- a/linkify.py
+++ b/linkify.py
@ -1,49 +0,0 @@
-from __future__ import print_function
-import re, sys
-
-
-def strip_tags (text):
-    return re.sub(r"<.*?>", "", text)
-
-def urlify (t, ext=".html"):
-    return t.replace(" ", "_") + ext
-
-def filename_to_padid (t):
-    t = t.replace("_", " ")
-    t = re.sub(r"\.html$", "", t)
-    return t
-
-def normalize_pad_name (n):
-    if '?' in n:
-        n = n.split('?', 1)[0]
-    if '/' in n:
-        n = n.split('/', 1)[0]
-    return n
-
-def linkify (src, urlify=urlify):
-
-    collect = []
-
-    def s (m):
-        contents = strip_tags(m.group(1))
-        contents = normalize_pad_name(contents)
-        collect.append(contents)
-        link = urlify(contents)
-        # link = link.split("?", 1)[0]
-        return "[[<a class=\"wikilink\" href=\"{0}\">{1}</a>]]".format(link, contents)
-
-    # src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src)
-    ## question marks are ignored by etherpad, so split/strip it
-    ## strip slashes as well!! (/timeslider)
-    src = re.sub(r"\[\[(.+?)\]\]", s, src)
-    return (src, collect)
-
-
-if __name__ == "__main__":
-    src = sys.stdin.read()
-    src, links = linkify(src)
-
-    for l in links:
-        print (l)
-
-    print (src)