from __future__ import print_function import re, sys def strip_tags (text): return re.sub(r"<.*?>", "", text) def urlify (t, ext=".html"): return t.replace(" ", "_") + ext def filename_to_padid (t): t = t.replace("_", " ") t = re.sub(r"\.html$", "", t) return t def normalize_pad_name (n): if '?' in n: n = n.split('?', 1)[0] if '/' in n: n = n.split('/', 1)[0] return n def linkify (src, urlify=urlify): collect = [] def s (m): contents = strip_tags(m.group(1)) contents = normalize_pad_name(contents) collect.append(contents) link = urlify(contents) # link = link.split("?", 1)[0] return "[[{1}]]".format(link, contents) # src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src) ## question marks are ignored by etherpad, so split/strip it ## strip slashes as well!! (/timeslider) src = re.sub(r"\[\[(.+?)\]\]", s, src) return (src, collect) if __name__ == "__main__": src = sys.stdin.read() src, links = linkify(src) for l in links: print (l) print (src)