from __future__ import print_function import re, sys def strip_tags (text): return re.sub(r"<.*?>", "", text) def urlify (t): return t.replace(" ", "_") + ".html" def filename_to_padid (t): t = t.replace("_", " ") t = re.sub(r"\.html$", "", t) return t def linkify (src, urlify=urlify): collect = [] def s (m): contents = strip_tags(m.group(1)) collect.append(contents) link = urlify(contents) # link = link.split("?", 1)[0] return "[[{1}]]".format(link, contents) # src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src) ## question marks are ignored by etherpad, so split/strip it src = re.sub(r"\[\[(.+?)(\?.*)?\]\]", s, src) return (src, collect) if __name__ == "__main__": src = sys.stdin.read() src, links = linkify(src) for l in links: print (l) print (src)