varia.website/venv/lib/python3.11/site-packages/mdurl/_decode.py

from __future__ import annotations

from collections.abc import Sequence
import functools
import re

DECODE_DEFAULT_CHARS = ";/?:@&=+$,#"
DECODE_COMPONENT_CHARS = ""

decode_cache: dict[str, list[str]] = {}


def get_decode_cache(exclude: str) -> Sequence[str]:
    if exclude in decode_cache:
        return decode_cache[exclude]

    cache: list[str] = []
    decode_cache[exclude] = cache

    for i in range(128):
        ch = chr(i)
        cache.append(ch)

    for i in range(len(exclude)):
        ch_code = ord(exclude[i])
        cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:]

    return cache


# Decode percent-encoded string.
#
def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str:
    cache = get_decode_cache(exclude)
    repl_func = functools.partial(repl_func_with_cache, cache=cache)
    return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE)


def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:
    seq = match.group()
    result = ""

    i = 0
    l = len(seq)  # noqa: E741
    while i < l:
        b1 = int(seq[i + 1 : i + 3], 16)

        if b1 < 0x80:
            result += cache[b1]
            i += 3  # emulate JS for loop statement3
            continue

        if (b1 & 0xE0) == 0xC0 and (i + 3 < l):
            # 110xxxxx 10xxxxxx
            b2 = int(seq[i + 4 : i + 6], 16)

            if (b2 & 0xC0) == 0x80:
                all_bytes = bytes((b1, b2))
                try:
                    result += all_bytes.decode()
                except UnicodeDecodeError:
                    result += "\ufffd" * 2

                i += 3
                i += 3  # emulate JS for loop statement3
                continue

        if (b1 & 0xF0) == 0xE0 and (i + 6 < l):
            # 1110xxxx 10xxxxxx 10xxxxxx
            b2 = int(seq[i + 4 : i + 6], 16)
            b3 = int(seq[i + 7 : i + 9], 16)

            if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80:
                all_bytes = bytes((b1, b2, b3))
                try:
                    result += all_bytes.decode()
                except UnicodeDecodeError:
                    result += "\ufffd" * 3

                i += 6
                i += 3  # emulate JS for loop statement3
                continue

        if (b1 & 0xF8) == 0xF0 and (i + 9 < l):
            # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
            b2 = int(seq[i + 4 : i + 6], 16)
            b3 = int(seq[i + 7 : i + 9], 16)
            b4 = int(seq[i + 10 : i + 12], 16)

            if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80:
                all_bytes = bytes((b1, b2, b3, b4))
                try:
                    result += all_bytes.decode()
                except UnicodeDecodeError:
                    result += "\ufffd" * 4

                i += 9
                i += 3  # emulate JS for loop statement3
                continue

        result += "\ufffd"
        i += 3  # emulate JS for loop statement3

    return result
added declarations 2024-11-19 14:01:39 +01:00			`from __future__ import annotations`

			`from collections.abc import Sequence`
			`import functools`
			`import re`

			`DECODE_DEFAULT_CHARS = ";/?:@&=+$,#"`
			`DECODE_COMPONENT_CHARS = ""`

			`decode_cache: dict[str, list[str]] = {}`


			`def get_decode_cache(exclude: str) -> Sequence[str]:`
			`if exclude in decode_cache:`
			`return decode_cache[exclude]`

			`cache: list[str] = []`
			`decode_cache[exclude] = cache`

			`for i in range(128):`
			`ch = chr(i)`
			`cache.append(ch)`

			`for i in range(len(exclude)):`
			`ch_code = ord(exclude[i])`
			`cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:]`

			`return cache`


			`# Decode percent-encoded string.`
			`#`
			`def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str:`
			`cache = get_decode_cache(exclude)`
			`repl_func = functools.partial(repl_func_with_cache, cache=cache)`
			`return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE)`


			`def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:`
			`seq = match.group()`
			`result = ""`

			`i = 0`
			`l = len(seq) # noqa: E741`
			`while i < l:`
			`b1 = int(seq[i + 1 : i + 3], 16)`

			`if b1 < 0x80:`
			`result += cache[b1]`
			`i += 3 # emulate JS for loop statement3`
			`continue`

			`if (b1 & 0xE0) == 0xC0 and (i + 3 < l):`
			`# 110xxxxx 10xxxxxx`
			`b2 = int(seq[i + 4 : i + 6], 16)`

			`if (b2 & 0xC0) == 0x80:`
			`all_bytes = bytes((b1, b2))`
			`try:`
			`result += all_bytes.decode()`
			`except UnicodeDecodeError:`
			`result += "\ufffd" * 2`

			`i += 3`
			`i += 3 # emulate JS for loop statement3`
			`continue`

			`if (b1 & 0xF0) == 0xE0 and (i + 6 < l):`
			`# 1110xxxx 10xxxxxx 10xxxxxx`
			`b2 = int(seq[i + 4 : i + 6], 16)`
			`b3 = int(seq[i + 7 : i + 9], 16)`

			`if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80:`
			`all_bytes = bytes((b1, b2, b3))`
			`try:`
			`result += all_bytes.decode()`
			`except UnicodeDecodeError:`
			`result += "\ufffd" * 3`

			`i += 6`
			`i += 3 # emulate JS for loop statement3`
			`continue`

			`if (b1 & 0xF8) == 0xF0 and (i + 9 < l):`
			`# 111110xx 10xxxxxx 10xxxxxx 10xxxxxx`
			`b2 = int(seq[i + 4 : i + 6], 16)`
			`b3 = int(seq[i + 7 : i + 9], 16)`
			`b4 = int(seq[i + 10 : i + 12], 16)`

			`if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80:`
			`all_bytes = bytes((b1, b2, b3, b4))`
			`try:`
			`result += all_bytes.decode()`
			`except UnicodeDecodeError:`
			`result += "\ufffd" * 4`

			`i += 9`
			`i += 3 # emulate JS for loop statement3`
			`continue`

			`result += "\ufffd"`
			`i += 3 # emulate JS for loop statement3`

			`return result`