varia.website/venv/lib/python3.11/site-packages/pygments/util.py


								# -*- coding: utf-8 -*-

								"""

								    pygments.util

								    ~~~~~~~~~~~~~


								    Utility functions.


								    :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.

								    :license: BSD, see LICENSE for details.

								"""


								import re

								import sys

								from io import TextIOWrapper


								split_path_re = re.compile(r'[/\\ ]')

								doctype_lookup_re = re.compile(r'''

								    (<\?.*?\?>)?\s*

								    <!DOCTYPE\s+(

								     [a-zA-Z_][a-zA-Z0-9]*

								     (?: \s+      # optional in HTML5

								     [a-zA-Z_][a-zA-Z0-9]*\s+

								     "[^"]*")?

								     )

								     [^>]*>

								''', re.DOTALL | re.MULTILINE | re.VERBOSE)

								tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',

								                    re.UNICODE | re.IGNORECASE | re.DOTALL | re.MULTILINE)

								xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)


								class ClassNotFound(ValueError):

								    """Raised if one of the lookup functions didn't find a matching class."""


								class OptionError(Exception):

								    pass


								def get_choice_opt(options, optname, allowed, default=None, normcase=False):

								    string = options.get(optname, default)

								    if normcase:

								        string = string.lower()

								    if string not in allowed:

								        raise OptionError('Value for option %s must be one of %s' %

								                          (optname, ', '.join(map(str, allowed))))

								    return string


								def get_bool_opt(options, optname, default=None):

								    string = options.get(optname, default)

								    if isinstance(string, bool):

								        return string

								    elif isinstance(string, int):

								        return bool(string)

								    elif not isinstance(string, str):

								        raise OptionError('Invalid type %r for option %s; use '

								                          '1/0, yes/no, true/false, on/off' % (

								                              string, optname))

								    elif string.lower() in ('1', 'yes', 'true', 'on'):

								        return True

								    elif string.lower() in ('0', 'no', 'false', 'off'):

								        return False

								    else:

								        raise OptionError('Invalid value %r for option %s; use '

								                          '1/0, yes/no, true/false, on/off' % (

								                              string, optname))


								def get_int_opt(options, optname, default=None):

								    string = options.get(optname, default)

								    try:

								        return int(string)

								    except TypeError:

								        raise OptionError('Invalid type %r for option %s; you '

								                          'must give an integer value' % (

								                              string, optname))

								    except ValueError:

								        raise OptionError('Invalid value %r for option %s; you '

								                          'must give an integer value' % (

								                              string, optname))


								def get_list_opt(options, optname, default=None):

								    val = options.get(optname, default)

								    if isinstance(val, str):

								        return val.split()

								    elif isinstance(val, (list, tuple)):

								        return list(val)

								    else:

								        raise OptionError('Invalid type %r for option %s; you '

								                          'must give a list value' % (

								                              val, optname))


								def docstring_headline(obj):

								    if not obj.__doc__:

								        return ''

								    res = []

								    for line in obj.__doc__.strip().splitlines():

								        if line.strip():

								            res.append(" " + line.strip())

								        else:

								            break

								    return ''.join(res).lstrip()


								def make_analysator(f):

								    """Return a static text analyser function that returns float values."""

								    def text_analyse(text):

								        try:

								            rv = f(text)

								        except Exception:

								            return 0.0

								        if not rv:

								            return 0.0

								        try:

								            return min(1.0, max(0.0, float(rv)))

								        except (ValueError, TypeError):

								            return 0.0

								    text_analyse.__doc__ = f.__doc__

								    return staticmethod(text_analyse)


								def shebang_matches(text, regex):

								    r"""Check if the given regular expression matches the last part of the

								    shebang if one exists.


								        >>> from pygments.util import shebang_matches

								        >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')

								        True

								        >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')

								        True

								        >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')

								        False

								        >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')

								        False

								        >>> shebang_matches('#!/usr/bin/startsomethingwith python',

								        ...                 r'python(2\.\d)?')

								        True


								    It also checks for common windows executable file extensions::


								        >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')

								        True


								    Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does

								    the same as ``'perl -e'``)


								    Note that this method automatically searches the whole string (eg:

								    the regular expression is wrapped in ``'^$'``)

								    """

								    index = text.find('\n')

								    if index >= 0:

								        first_line = text[:index].lower()

								    else:

								        first_line = text.lower()

								    if first_line.startswith('#!'):

								        try:

								            found = [x for x in split_path_re.split(first_line[2:].strip())

								                     if x and not x.startswith('-')][-1]

								        except IndexError:

								            return False

								        regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE)

								        if regex.search(found) is not None:

								            return True

								    return False


								def doctype_matches(text, regex):

								    """Check if the doctype matches a regular expression (if present).


								    Note that this method only checks the first part of a DOCTYPE.

								    eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'

								    """

								    m = doctype_lookup_re.search(text)

								    if m is None:

								        return False

								    doctype = m.group(2)

								    return re.compile(regex, re.I).match(doctype.strip()) is not None


								def html_doctype_matches(text):

								    """Check if the file looks like it has a html doctype."""

								    return doctype_matches(text, r'html')


								_looks_like_xml_cache = {}


								def looks_like_xml(text):

								    """Check if a doctype exists or if we have some tags."""

								    if xml_decl_re.match(text):

								        return True

								    key = hash(text)

								    try:

								        return _looks_like_xml_cache[key]

								    except KeyError:

								        m = doctype_lookup_re.search(text)

								        if m is not None:

								            return True

								        rv = tag_re.search(text[:1000]) is not None

								        _looks_like_xml_cache[key] = rv

								        return rv


								# Python narrow build compatibility


								def _surrogatepair(c):

								    # Given a unicode character code

								    # with length greater than 16 bits,

								    # return the two 16 bit surrogate pair.

								    # From example D28 of:

								    # http://www.unicode.org/book/ch03.pdf

								    return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))


								def unirange(a, b):

								    """Returns a regular expression string to match the given non-BMP range."""

								    if b < a:

								        raise ValueError("Bad character range")

								    if a < 0x10000 or b < 0x10000:

								        raise ValueError("unirange is only defined for non-BMP ranges")


								    if sys.maxunicode > 0xffff:

								        # wide build

								        return u'[%s-%s]' % (chr(a), chr(b))

								    else:

								        # narrow build stores surrogates, and the 're' module handles them

								        # (incorrectly) as characters.  Since there is still ordering among

								        # these characters, expand the range to one that it understands.  Some

								        # background in http://bugs.python.org/issue3665 and

								        # http://bugs.python.org/issue12749

								        #

								        # Additionally, the lower constants are using chr rather than

								        # literals because jython [which uses the wide path] can't load this

								        # file if they are literals.

								        ah, al = _surrogatepair(a)

								        bh, bl = _surrogatepair(b)

								        if ah == bh:

								            return u'(?:%s[%s-%s])' % (chr(ah), chr(al), chr(bl))

								        else:

								            buf = []

								            buf.append(u'%s[%s-%s]' % (chr(ah), chr(al),

								                                       ah == bh and chr(bl) or chr(0xdfff)))

								            if ah - bh > 1:

								                buf.append(u'[%s-%s][%s-%s]' %

								                           chr(ah+1), chr(bh-1), chr(0xdc00), chr(0xdfff))

								            if ah != bh:

								                buf.append(u'%s[%s-%s]' %

								                           (chr(bh), chr(0xdc00), chr(bl)))


								            return u'(?:' + u'|'.join(buf) + u')'


								def format_lines(var_name, seq, raw=False, indent_level=0):

								    """Formats a sequence of strings for output."""

								    lines = []

								    base_indent = ' ' * indent_level * 4

								    inner_indent = ' ' * (indent_level + 1) * 4

								    lines.append(base_indent + var_name + ' = (')

								    if raw:

								        # These should be preformatted reprs of, say, tuples.

								        for i in seq:

								            lines.append(inner_indent + i + ',')

								    else:

								        for i in seq:

								            # Force use of single quotes

								            r = repr(i + '"')

								            lines.append(inner_indent + r[:-2] + r[-1] + ',')

								    lines.append(base_indent + ')')

								    return '\n'.join(lines)


								def duplicates_removed(it, already_seen=()):

								    """

								    Returns a list with duplicates removed from the iterable `it`.


								    Order is preserved.

								    """

								    lst = []

								    seen = set()

								    for i in it:

								        if i in seen or i in already_seen:

								            continue

								        lst.append(i)

								        seen.add(i)

								    return lst


								class Future:

								    """Generic class to defer some work.


								    Handled specially in RegexLexerMeta, to support regex string construction at

								    first use.

								    """

								    def get(self):

								        raise NotImplementedError


								def guess_decode(text):

								    """Decode *text* with guessed encoding.


								    First try UTF-8; this should fail for non-UTF-8 encodings.

								    Then try the preferred locale encoding.

								    Fall back to latin-1, which always works.

								    """

								    try:

								        text = text.decode('utf-8')

								        return text, 'utf-8'

								    except UnicodeDecodeError:

								        try:

								            import locale

								            prefencoding = locale.getpreferredencoding()

								            text = text.decode()

								            return text, prefencoding

								        except (UnicodeDecodeError, LookupError):

								            text = text.decode('latin1')

								            return text, 'latin1'


								def guess_decode_from_terminal(text, term):

								    """Decode *text* coming from terminal *term*.


								    First try the terminal encoding, if given.

								    Then try UTF-8.  Then try the preferred locale encoding.

								    Fall back to latin-1, which always works.

								    """

								    if getattr(term, 'encoding', None):

								        try:

								            text = text.decode(term.encoding)

								        except UnicodeDecodeError:

								            pass

								        else:

								            return text, term.encoding

								    return guess_decode(text)


								def terminal_encoding(term):

								    """Return our best guess of encoding for the given *term*."""

								    if getattr(term, 'encoding', None):

								        return term.encoding

								    import locale

								    return locale.getpreferredencoding()


								class UnclosingTextIOWrapper(TextIOWrapper):

								    # Don't close underlying buffer on destruction.

								    def close(self):

								        self.flush()