etherpump/etherpump/commands/common.py

import json
import os
import re
import sys
from html.entities import name2codepoint
from time import sleep
from urllib.parse import quote_plus, unquote_plus
from urllib.request import HTTPError, urlopen

import trio

groupnamepat = re.compile(r"^g\.(\w+)\$")


def splitpadname(padid):
    m = groupnamepat.match(padid)
    if m:
        return (m.group(1), padid[m.end() :])
    else:
        return ("", padid)


def padurl(padid,):
    return padid


def padpath(padid, pub_path="", group_path="", normalize=False):
    g, p = splitpadname(padid)
    p = quote_plus(p)
    if normalize:
        p = p.replace(" ", "_")
        p = p.replace("(", "")
        p = p.replace(")", "")
        p = p.replace("?", "")
        p = p.replace("'", "")
    if g:
        return os.path.join(group_path, g, p)
    else:
        return os.path.join(pub_path, p)


def padpath2id(path):
    dd, p = os.path.split(path)
    gname = dd.split("/")[-1]
    p = unquote_plus(p)
    if gname:
        return "{0}${1}".format(gname, p).decode("utf-8")
    else:
        return p.decode("utf-8")


def getjson(url, max_retry=3, retry_sleep_time=3):
    ret = {}
    ret["_retries"] = 0
    while ret["_retries"] <= max_retry:
        try:
            f = urlopen(url)
            data = f.read()
            data = data.decode("utf-8")
            rurl = f.geturl()
            f.close()
            ret.update(json.loads(data))
            ret["_code"] = f.getcode()
            if rurl != url:
                ret["_url"] = rurl
            return ret
        except ValueError as e:
            url = "http://localhost" + url
        except HTTPError as e:
            print("HTTPError {0}".format(e), file=sys.stderr)
            ret["_code"] = e.code
            ret["_retries"] += 1
            if retry_sleep_time:
                sleep(retry_sleep_time)
    return ret


async def agetjson(session, url):
    """The asynchronous version of getjson."""
    RETRY = 20
    TIMEOUT = 10

    ret = {}
    ret["_retries"] = 0

    try:
        response = await session.get(url, timeout=TIMEOUT, retries=RETRY)
        rurl = response.url
        ret.update(response.json())
        ret["_code"] = response.status_code
        if rurl != url:
            ret["_url"] = rurl
        return ret
    except Exception as e:
        print("Failed to download {}, saw {}".format(url, str(e)))
        return


def loadpadinfo(p):
    with open(p) as f:
        info = json.load(f)
        if "localapiurl" not in info:
            info["localapiurl"] = info.get("apiurl")
    return info


# Python developer Fredrik Lundh (author of elementtree, among other things)
# has such a function on his website, which works with decimal, hex and named
# entities:

##
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text):
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return chr(int(text[3:-1], 16))
                else:
                    return chr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = chr(name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text  # leave as is

    return re.sub("&#?\w+;", fixup, text)


def istty():
    return sys.stdout.isatty() and os.environ.get("TERM") != "dumb"


def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i : i + n]
Add maintenance tools and run them 5 years ago			`import json`
			`import os`
			`import re`
			`import sys`
			`from html.entities import name2codepoint`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`from time import sleep`
Add fancy progress bar 5 years ago			`from urllib.parse import quote_plus, unquote_plus`
			`from urllib.request import HTTPError, urlopen`
make file friendliness 9 years ago
An initial stab at a async ready pull 4 years ago			`import trio`

make file friendliness 9 years ago			`groupnamepat = re.compile(r"^g\.(\w+)\$")`
Add maintenance tools and run them 5 years ago

			`def splitpadname(padid):`
make file friendliness 9 years ago			`m = groupnamepat.match(padid)`
			`if m:`
Add maintenance tools and run them 5 years ago			`return (m.group(1), padid[m.end() :])`
make file friendliness 9 years ago			`else:`
Migrate to Python 3 5 years ago			`return ("", padid)`
make file friendliness 9 years ago
Add maintenance tools and run them 5 years ago
			`def padurl(padid,):`
new 8 years ago			`return padid`

Add maintenance tools and run them 5 years ago
			`def padpath(padid, pub_path="", group_path="", normalize=False):`
make file friendliness 9 years ago			`g, p = splitpadname(padid)`
			`p = quote_plus(p)`
python3 6 years ago			`if normalize:`
			`p = p.replace(" ", "_")`
			`p = p.replace("(", "")`
			`p = p.replace(")", "")`
			`p = p.replace("?", "")`
			`p = p.replace("'", "")`
make file friendliness 9 years ago			`if g:`
			`return os.path.join(group_path, g, p)`
			`else:`
			`return os.path.join(pub_path, p)`
changes 9 years ago
Add maintenance tools and run them 5 years ago
			`def padpath2id(path):`
changes 9 years ago			`dd, p = os.path.split(path)`
			`gname = dd.split("/")[-1]`
			`p = unquote_plus(p)`
			`if gname:`
			`return "{0}${1}".format(gname, p).decode("utf-8")`
			`else:`
			`return p.decode("utf-8")`

Add maintenance tools and run them 5 years ago
			`def getjson(url, max_retry=3, retry_sleep_time=3):`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`ret = {}`
			`ret["_retries"] = 0`
			`while ret["_retries"] <= max_retry:`
			`try:`
			`f = urlopen(url)`
			`data = f.read()`
python3 6 years ago			`data = data.decode("utf-8")`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`rurl = f.geturl()`
			`f.close()`
			`ret.update(json.loads(data))`
			`ret["_code"] = f.getcode()`
			`if rurl != url:`
			`ret["_url"] = rurl`
			`return ret`
support partial url 8 years ago			`except ValueError as e:`
			`url = "http://localhost" + url`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`except HTTPError as e:`
Add maintenance tools and run them 5 years ago			`print("HTTPError {0}".format(e), file=sys.stderr)`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`ret["_code"] = e.code`
Add maintenance tools and run them 5 years ago			`ret["_retries"] += 1`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`if retry_sleep_time:`
			`sleep(retry_sleep_time)`
			`return ret`
changes 9 years ago
Add maintenance tools and run them 5 years ago
An initial stab at a async ready pull 4 years ago			`async def agetjson(session, url):`
			`"""The asynchronous version of getjson."""`
			`RETRY = 20`
			`TIMEOUT = 10`

			`ret = {}`
			`ret["_retries"] = 0`

			`try:`
			`response = await session.get(url, timeout=TIMEOUT, retries=RETRY)`
			`rurl = response.url`
			`ret.update(response.json())`
			`ret["_code"] = response.status_code`
			`if rurl != url:`
			`ret["_url"] = rurl`
			`return ret`
			`except Exception as e:`
Use poetry, run formatters 4 years ago			`print("Failed to download {}, saw {}".format(url, str(e)))`
An initial stab at a async ready pull 4 years ago			`return`


changes 9 years ago			`def loadpadinfo(p):`
			`with open(p) as f:`
			`info = json.load(f)`
Use poetry, run formatters 4 years ago			`if "localapiurl" not in info:`
			`info["localapiurl"] = info.get("apiurl")`
changes 9 years ago			`return info`

Add maintenance tools and run them 5 years ago
Wrap that comment 4 years ago			`# Python developer Fredrik Lundh (author of elementtree, among other things)`
			`# has such a function on his website, which works with decimal, hex and named`
			`# entities:`

fixed vital encoding error in pad urls 8 years ago			`##`
			`# Removes HTML or XML character references and entities from a text string.`
			`#`
			`# @param text The HTML (or XML) source text.`
			`# @return The plain text, as a Unicode string, if necessary.`
			`def unescape(text):`
			`def fixup(m):`
			`text = m.group(0)`
			`if text[:2] == "&#":`
			`# character reference`
			`try:`
			`if text[:3] == "&#x":`
Migrate to Python 3 5 years ago			`return chr(int(text[3:-1], 16))`
fixed vital encoding error in pad urls 8 years ago			`else:`
Migrate to Python 3 5 years ago			`return chr(int(text[2:-1]))`
fixed vital encoding error in pad urls 8 years ago			`except ValueError:`
			`pass`
			`else:`
			`# named entity`
			`try:`
Migrate to Python 3 5 years ago			`text = chr(name2codepoint[text[1:-1]])`
fixed vital encoding error in pad urls 8 years ago			`except KeyError:`
			`pass`
Add maintenance tools and run them 5 years ago			`return text # leave as is`

fixed vital encoding error in pad urls 8 years ago			`return re.sub("&#?\w+;", fixup, text)`
Add fancy progress bar 5 years ago

			`def istty():`
Use poetry, run formatters 4 years ago			`return sys.stdout.isatty() and os.environ.get("TERM") != "dumb"`
An initial stab at a async ready pull 4 years ago

			`def chunks(lst, n):`
			`for i in range(0, len(lst), n):`
			`yield lst[i : i + n]`