varia.website/venv/lib/python3.11/site-packages/unidecode/__init__.py


								# -*- coding: utf-8 -*-

								# vi:tabstop=4:expandtab:sw=4

								"""Transliterate Unicode text into plain 7-bit ASCII.


								Example usage:


								>>> from unidecode import unidecode

								>>> unidecode("\u5317\u4EB0")

								"Bei Jing "


								The transliteration uses a straightforward map, and doesn't have alternatives

								for the same character based on language, position, or anything else.


								A standard string object will be returned. If you need bytes, use:


								>>> unidecode("Κνωσός").encode("ascii")

								b'Knosos'

								"""

								import warnings

								from typing import Dict, Optional, Sequence


								Cache = {} # type: Dict[int, Optional[Sequence[Optional[str]]]]


								class UnidecodeError(ValueError):

								    def __init__(self, message: str, index: Optional[int] = None) -> None:

								        """Raised for Unidecode-related errors.


								        The index attribute contains the index of the character that caused

								        the error.

								        """

								        super(UnidecodeError, self).__init__(message)

								        self.index = index


								def unidecode_expect_ascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:

								    """Transliterate an Unicode object into an ASCII string


								    >>> unidecode("\u5317\u4EB0")

								    "Bei Jing "


								    This function first tries to convert the string using ASCII codec.

								    If it fails (because of non-ASCII characters), it falls back to

								    transliteration using the character tables.


								    This is approx. five times faster if the string only contains ASCII

								    characters, but slightly slower than unicode_expect_nonascii if

								    non-ASCII characters are present.


								    errors specifies what to do with characters that have not been

								    found in replacement tables. The default is 'ignore' which ignores

								    the character. 'strict' raises an UnidecodeError. 'replace'

								    substitutes the character with replace_str (default is '?').

								    'preserve' keeps the original character.


								    Note that if 'preserve' is used the returned string might not be

								    ASCII!

								    """


								    try:

								        bytestring = string.encode('ASCII')

								    except UnicodeEncodeError:

								        pass

								    else:

								        return string


								    return _unidecode(string, errors, replace_str)


								def unidecode_expect_nonascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:

								    """Transliterate an Unicode object into an ASCII string


								    >>> unidecode("\u5317\u4EB0")

								    "Bei Jing "


								    See unidecode_expect_ascii.

								    """


								    return _unidecode(string, errors, replace_str)


								unidecode = unidecode_expect_ascii


								def _get_repl_str(char: str) -> Optional[str]:

								    codepoint = ord(char)


								    if codepoint < 0x80:

								        # Already ASCII

								        return str(char)


								    if codepoint > 0xeffff:

								        # No data on characters in Private Use Area and above.

								        return None


								    if 0xd800 <= codepoint <= 0xdfff:

								        warnings.warn(  "Surrogate character %r will be ignored. "

								                        "You might be using a narrow Python build." % (char,),

								                        RuntimeWarning, 2)


								    section = codepoint >> 8   # Chop off the last two hex digits

								    position = codepoint % 256 # Last two hex digits


								    try:

								        table = Cache[section]

								    except KeyError:

								        try:

								            mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])

								        except ImportError:

								            # No data on this character

								            Cache[section] = None

								            return None


								        Cache[section] = table = mod.data


								    if table and len(table) > position:

								        return table[position]

								    else:

								        return None


								def _unidecode(string: str, errors: str, replace_str:str) -> str:

								    retval = []


								    for index, char in enumerate(string):

								        repl = _get_repl_str(char)


								        if repl is None:

								            if errors == 'ignore':

								                repl = ''

								            elif errors == 'strict':

								                raise UnidecodeError('no replacement found for character %r '

								                        'in position %d' % (char, index), index)

								            elif errors == 'replace':

								                repl = replace_str

								            elif errors == 'preserve':

								                repl = char

								            else:

								                raise UnidecodeError('invalid value for errors parameter %r' % (errors,))


								        retval.append(repl)


								    return ''.join(retval)