etherpump/etherdump/commands/common.py

from __future__ import print_function
import re, os, json, sys
from urllib import quote_plus, unquote_plus
from math import ceil, floor
from urllib2 import urlopen, HTTPError
from time import sleep


groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
    m = groupnamepat.match(padid)
    if m:
        return(m.group(1), padid[m.end():])
    else:
        return (u"", padid)

def padurl (padid, ):
    return padid

def padpath (padid, pub_path=u"", group_path=u""):
    g, p = splitpadname(padid)
    if type(g) == unicode:
        g = g.encode("utf-8")
    if type(p) == unicode:
        p = p.encode("utf-8")
    p = quote_plus(p)
    # p = p.replace(" ", "_")
    # p = p.replace("*", "-")
    if g:
        return os.path.join(group_path, g, p)
    else:
        return os.path.join(pub_path, p)

def padpath2id (path):
    if type(path) == unicode:
        path = path.encode("utf-8")
    dd, p = os.path.split(path)
    gname = dd.split("/")[-1]
    p = unquote_plus(p)
    if gname:
        return "{0}${1}".format(gname, p).decode("utf-8")
    else:
        return p.decode("utf-8")

def getjson (url, max_retry=3, retry_sleep_time=0.5):
    ret = {}
    ret["_retries"] = 0
    while ret["_retries"] <= max_retry:
        try:
            f = urlopen(url)
            data = f.read()
            rurl = f.geturl()
            f.close()
            ret.update(json.loads(data))
            ret["_code"] = f.getcode()
            if rurl != url:
                ret["_url"] = rurl
            return ret
        except HTTPError as e:
            print ("HTTPError {0}".format(e), file=sys.stderr)
            ret["_code"] = e.code
            ret["_retries"]+=1
            if retry_sleep_time:
                sleep(retry_sleep_time)
    return ret

def loadpadinfo(p):
    with open(p) as f:
        info = json.load(f)
    return info

def progressbar (i, num, label="", file=sys.stderr):
    p = float(i) / num
    percentage = int(floor(p*100))
    bars = int(ceil(p*20))
    bar = ("*"*bars) + ("-"*(20-bars))
    msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)
    sys.stderr.write(msg.encode("utf-8"))
    sys.stderr.flush()


# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
import re, htmlentitydefs
##
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text):
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text)
new pull, new meta style from live constant etherdumpÄ 9 years ago			`from __future__ import print_function`
changes 9 years ago			`import re, os, json, sys`
make file friendliness 9 years ago			`from urllib import quote_plus, unquote_plus`
changes 9 years ago			`from math import ceil, floor`
new pull, new meta style from live constant etherdumpÄ 9 years ago			`from urllib2 import urlopen, HTTPError`
			`from time import sleep`

make file friendliness 9 years ago
			`groupnamepat = re.compile(r"^g\.(\w+)\$")`
			`def splitpadname (padid):`
			`m = groupnamepat.match(padid)`
			`if m:`
			`return(m.group(1), padid[m.end():])`
			`else:`
			`return (u"", padid)`

new 9 years ago			`def padurl (padid, ):`
			`return padid`

make file friendliness 9 years ago			`def padpath (padid, pub_path=u"", group_path=u""):`
			`g, p = splitpadname(padid)`
			`if type(g) == unicode:`
			`g = g.encode("utf-8")`
			`if type(p) == unicode:`
			`p = p.encode("utf-8")`
			`p = quote_plus(p)`
			`# p = p.replace(" ", "_")`
			`# p = p.replace("*", "-")`
			`if g:`
			`return os.path.join(group_path, g, p)`
			`else:`
			`return os.path.join(pub_path, p)`
changes 9 years ago
			`def padpath2id (path):`
			`if type(path) == unicode:`
			`path = path.encode("utf-8")`
			`dd, p = os.path.split(path)`
			`gname = dd.split("/")[-1]`
			`p = unquote_plus(p)`
			`if gname:`
			`return "{0}${1}".format(gname, p).decode("utf-8")`
			`else:`
			`return p.decode("utf-8")`

new pull, new meta style from live constant etherdumpÄ 9 years ago			`def getjson (url, max_retry=3, retry_sleep_time=0.5):`
			`ret = {}`
			`ret["_retries"] = 0`
			`while ret["_retries"] <= max_retry:`
			`try:`
			`f = urlopen(url)`
			`data = f.read()`
			`rurl = f.geturl()`
			`f.close()`
			`ret.update(json.loads(data))`
			`ret["_code"] = f.getcode()`
			`if rurl != url:`
			`ret["_url"] = rurl`
			`return ret`
			`except HTTPError as e:`
			`print ("HTTPError {0}".format(e), file=sys.stderr)`
			`ret["_code"] = e.code`
			`ret["_retries"]+=1`
			`if retry_sleep_time:`
			`sleep(retry_sleep_time)`
			`return ret`
changes 9 years ago
			`def loadpadinfo(p):`
			`with open(p) as f:`
			`info = json.load(f)`
			`return info`

			`def progressbar (i, num, label="", file=sys.stderr):`
			`p = float(i) / num`
			`percentage = int(floor(p*100))`
			`bars = int(ceil(p*20))`
			`bar = (""bars) + ("-"*(20-bars))`
			`msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)`
			`sys.stderr.write(msg.encode("utf-8"))`
			`sys.stderr.flush()`
fixed vital encoding error in pad urls 9 years ago


			`# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:`
			`import re, htmlentitydefs`
			`##`
			`# Removes HTML or XML character references and entities from a text string.`
			`#`
			`# @param text The HTML (or XML) source text.`
			`# @return The plain text, as a Unicode string, if necessary.`
			`def unescape(text):`
			`def fixup(m):`
			`text = m.group(0)`
			`if text[:2] == "&#":`
			`# character reference`
			`try:`
			`if text[:3] == "&#x":`
			`return unichr(int(text[3:-1], 16))`
			`else:`
			`return unichr(int(text[2:-1]))`
			`except ValueError:`
			`pass`
			`else:`
			`# named entity`
			`try:`
			`text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])`
			`except KeyError:`
			`pass`
			`return text # leave as is`
			`return re.sub("&#?\w+;", fixup, text)`