etherpump/linkify.py

from __future__ import print_function
import re, sys


def strip_tags (text):
    return re.sub(r"<.*?>", "", text)

def urlify (t, ext=".html"):
    return t.replace(" ", "_") + ext

def filename_to_padid (t):
    t = t.replace("_", " ")
    t = re.sub(r"\.html$", "", t)
    return t

def normalize_pad_name (n):
    if '?' in n:
        n = n.split('?', 1)[0]
    if '/' in n:
        n = n.split('/', 1)[0]
    return n

def linkify (src, urlify=urlify):

    collect = []

    def s (m):
        contents = strip_tags(m.group(1))
        contents = normalize_pad_name(contents)
        collect.append(contents)
        link = urlify(contents)
        # link = link.split("?", 1)[0]
        return "[[<a class=\"wikilink\" href=\"{0}\">{1}</a>]]".format(link, contents)

    # src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src)
    ## question marks are ignored by etherpad, so split/strip it
    ## strip slashes as well!! (/timeslider)
    src = re.sub(r"\[\[(.+?)\]\]", s, src)
    return (src, collect)


if __name__ == "__main__":
    src = sys.stdin.read()
    src, links = linkify(src)

    for l in links:
        print (l)

    print (src)
dump html with trim and linkify 2015-07-23 18:09:20 +02:00			`from __future__ import print_function`
			`import re, sys`


updated dump_html to fix links 2015-07-30 13:33:39 +02:00			`def strip_tags (text):`
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`return re.sub(r"<.*?>", "", text)`
updated dump_html to fix links 2015-07-30 13:33:39 +02:00
etherdump working with colors 2015-08-25 11:54:37 +02:00			`def urlify (t, ext=".html"):`
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`return t.replace(" ", "_") + ext`
dump html with trim and linkify 2015-07-23 18:09:20 +02:00
updated dump_html to fix links 2015-07-30 13:33:39 +02:00			`def filename_to_padid (t):`
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`t = t.replace("_", " ")`
			`t = re.sub(r"\.html$", "", t)`
			`return t`

			`def normalize_pad_name (n):`
			`if '?' in n:`
			`n = n.split('?', 1)[0]`
			`if '/' in n:`
			`n = n.split('/', 1)[0]`
			`return n`
updated dump_html to fix links 2015-07-30 13:33:39 +02:00
dump html with trim and linkify 2015-07-23 18:09:20 +02:00			`def linkify (src, urlify=urlify):`

merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`collect = []`
dump html with trim and linkify 2015-07-23 18:09:20 +02:00
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`def s (m):`
			`contents = strip_tags(m.group(1))`
			`contents = normalize_pad_name(contents)`
			`collect.append(contents)`
			`link = urlify(contents)`
			`# link = link.split("?", 1)[0]`
			`return "[[<a class=\"wikilink\" href=\"{0}\">{1}</a>]]".format(link, contents)`
dump html with trim and linkify 2015-07-23 18:09:20 +02:00
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`# src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src)`
			`## question marks are ignored by etherpad, so split/strip it`
			`## strip slashes as well!! (/timeslider)`
			`src = re.sub(r"\[\[(.+?)\]\]", s, src)`
			`return (src, collect)`
dump html with trim and linkify 2015-07-23 18:09:20 +02:00

			`if __name__ == "__main__":`
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`src = sys.stdin.read()`
			`src, links = linkify(src)`
dump html with trim and linkify 2015-07-23 18:09:20 +02:00
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`for l in links:`
			`print (l)`
dump html with trim and linkify 2015-07-23 18:09:20 +02:00
merged roels unicsss.py code 2015-08-27 17:02:05 +02:00			`print (src)`