etherpump/etherdump/commands/common.py

129 lines
3.7 KiB
Python
Raw Normal View History

from __future__ import print_function
2015-11-24 13:48:03 +01:00
import re, os, json, sys
from math import ceil, floor
from time import sleep
2018-01-12 14:42:55 +01:00
try:
# python2
from urlparse import urlparse, urlunparse
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
from urllib import quote_plus, unquote_plus
from htmlentitydefs import name2codepoint
input = raw_input
except ImportError:
# python3
from urllib.parse import urlparse, urlunparse, urlencode, quote_plus, unquote_plus
from urllib.request import urlopen, URLError, HTTPError
from html.entities import name2codepoint
2015-11-13 11:03:57 +01:00
groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
m = groupnamepat.match(padid)
if m:
return(m.group(1), padid[m.end():])
else:
return (u"", padid)
2015-12-04 17:17:32 +01:00
def padurl (padid, ):
return padid
2018-01-12 14:42:55 +01:00
def padpath (padid, pub_path=u"", group_path=u"", normalize=False):
2015-11-13 11:03:57 +01:00
g, p = splitpadname(padid)
2018-01-12 14:42:55 +01:00
# if type(g) == unicode:
# g = g.encode("utf-8")
# if type(p) == unicode:
# p = p.encode("utf-8")
2015-11-13 11:03:57 +01:00
p = quote_plus(p)
2018-01-12 14:42:55 +01:00
if normalize:
p = p.replace(" ", "_")
p = p.replace("(", "")
p = p.replace(")", "")
p = p.replace("?", "")
p = p.replace("'", "")
2015-11-13 11:03:57 +01:00
if g:
return os.path.join(group_path, g, p)
else:
return os.path.join(pub_path, p)
2015-11-24 13:48:03 +01:00
def padpath2id (path):
if type(path) == unicode:
path = path.encode("utf-8")
dd, p = os.path.split(path)
gname = dd.split("/")[-1]
p = unquote_plus(p)
if gname:
return "{0}${1}".format(gname, p).decode("utf-8")
else:
return p.decode("utf-8")
def getjson (url, max_retry=3, retry_sleep_time=0.5):
ret = {}
ret["_retries"] = 0
while ret["_retries"] <= max_retry:
try:
f = urlopen(url)
data = f.read()
2018-01-12 14:42:55 +01:00
data = data.decode("utf-8")
rurl = f.geturl()
f.close()
ret.update(json.loads(data))
ret["_code"] = f.getcode()
if rurl != url:
ret["_url"] = rurl
return ret
2016-03-02 23:27:04 +01:00
except ValueError as e:
url = "http://localhost" + url
except HTTPError as e:
print ("HTTPError {0}".format(e), file=sys.stderr)
ret["_code"] = e.code
ret["_retries"]+=1
if retry_sleep_time:
sleep(retry_sleep_time)
return ret
2015-11-24 13:48:03 +01:00
def loadpadinfo(p):
with open(p) as f:
info = json.load(f)
return info
def progressbar (i, num, label="", file=sys.stderr):
p = float(i) / num
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)
2018-01-12 14:42:55 +01:00
sys.stderr.write(msg)
2015-11-24 13:48:03 +01:00
sys.stderr.flush()
2016-01-15 12:06:21 +01:00
# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
##
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
2018-01-12 14:42:55 +01:00
text = unichr(name2codepoint[text[1:-1]])
2016-01-15 12:06:21 +01:00
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)