etherpump/linkify.py

50 lines
1.1 KiB
Python
Raw Normal View History

2015-07-23 18:09:20 +02:00
from __future__ import print_function
import re, sys
2015-07-30 13:33:39 +02:00
def strip_tags (text):
2015-08-27 17:02:05 +02:00
return re.sub(r"<.*?>", "", text)
2015-07-30 13:33:39 +02:00
2015-08-25 11:54:37 +02:00
def urlify (t, ext=".html"):
2015-08-27 17:02:05 +02:00
return t.replace(" ", "_") + ext
2015-07-23 18:09:20 +02:00
2015-07-30 13:33:39 +02:00
def filename_to_padid (t):
2015-08-27 17:02:05 +02:00
t = t.replace("_", " ")
t = re.sub(r"\.html$", "", t)
return t
def normalize_pad_name (n):
if '?' in n:
n = n.split('?', 1)[0]
if '/' in n:
n = n.split('/', 1)[0]
return n
2015-07-30 13:33:39 +02:00
2015-07-23 18:09:20 +02:00
def linkify (src, urlify=urlify):
2015-08-27 17:02:05 +02:00
collect = []
2015-07-23 18:09:20 +02:00
2015-08-27 17:02:05 +02:00
def s (m):
contents = strip_tags(m.group(1))
contents = normalize_pad_name(contents)
collect.append(contents)
link = urlify(contents)
# link = link.split("?", 1)[0]
return "[[<a class=\"wikilink\" href=\"{0}\">{1}</a>]]".format(link, contents)
2015-07-23 18:09:20 +02:00
2015-08-27 17:02:05 +02:00
# src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src)
## question marks are ignored by etherpad, so split/strip it
## strip slashes as well!! (/timeslider)
src = re.sub(r"\[\[(.+?)\]\]", s, src)
return (src, collect)
2015-07-23 18:09:20 +02:00
if __name__ == "__main__":
2015-08-27 17:02:05 +02:00
src = sys.stdin.read()
src, links = linkify(src)
2015-07-23 18:09:20 +02:00
2015-08-27 17:02:05 +02:00
for l in links:
print (l)
2015-07-23 18:09:20 +02:00
2015-08-27 17:02:05 +02:00
print (src)