varia.website/venv/lib/python3.11/site-packages/html5lib/treewalkers/base.py


								from __future__ import absolute_import, division, unicode_literals


								from xml.dom import Node

								from ..constants import namespaces, voidElements, spaceCharacters


								__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",

								           "TreeWalker", "NonRecursiveTreeWalker"]


								DOCUMENT = Node.DOCUMENT_NODE

								DOCTYPE = Node.DOCUMENT_TYPE_NODE

								TEXT = Node.TEXT_NODE

								ELEMENT = Node.ELEMENT_NODE

								COMMENT = Node.COMMENT_NODE

								ENTITY = Node.ENTITY_NODE

								UNKNOWN = "<#UNKNOWN#>"


								spaceCharacters = "".join(spaceCharacters)


								class TreeWalker(object):

								    """Walks a tree yielding tokens


								    Tokens are dicts that all have a ``type`` field specifying the type of the

								    token.


								    """

								    def __init__(self, tree):

								        """Creates a TreeWalker


								        :arg tree: the tree to walk


								        """

								        self.tree = tree


								    def __iter__(self):

								        raise NotImplementedError


								    def error(self, msg):

								        """Generates an error token with the given message


								        :arg msg: the error message


								        :returns: SerializeError token


								        """

								        return {"type": "SerializeError", "data": msg}


								    def emptyTag(self, namespace, name, attrs, hasChildren=False):

								        """Generates an EmptyTag token


								        :arg namespace: the namespace of the token--can be ``None``


								        :arg name: the name of the element


								        :arg attrs: the attributes of the element as a dict


								        :arg hasChildren: whether or not to yield a SerializationError because

								            this tag shouldn't have children


								        :returns: EmptyTag token


								        """

								        yield {"type": "EmptyTag", "name": name,

								               "namespace": namespace,

								               "data": attrs}

								        if hasChildren:

								            yield self.error("Void element has children")


								    def startTag(self, namespace, name, attrs):

								        """Generates a StartTag token


								        :arg namespace: the namespace of the token--can be ``None``


								        :arg name: the name of the element


								        :arg attrs: the attributes of the element as a dict


								        :returns: StartTag token


								        """

								        return {"type": "StartTag",

								                "name": name,

								                "namespace": namespace,

								                "data": attrs}


								    def endTag(self, namespace, name):

								        """Generates an EndTag token


								        :arg namespace: the namespace of the token--can be ``None``


								        :arg name: the name of the element


								        :returns: EndTag token


								        """

								        return {"type": "EndTag",

								                "name": name,

								                "namespace": namespace}


								    def text(self, data):

								        """Generates SpaceCharacters and Characters tokens


								        Depending on what's in the data, this generates one or more

								        ``SpaceCharacters`` and ``Characters`` tokens.


								        For example:


								            >>> from html5lib.treewalkers.base import TreeWalker

								            >>> # Give it an empty tree just so it instantiates

								            >>> walker = TreeWalker([])

								            >>> list(walker.text(''))

								            []

								            >>> list(walker.text('  '))

								            [{u'data': '  ', u'type': u'SpaceCharacters'}]

								            >>> list(walker.text(' abc '))  # doctest: +NORMALIZE_WHITESPACE

								            [{u'data': ' ', u'type': u'SpaceCharacters'},

								            {u'data': u'abc', u'type': u'Characters'},

								            {u'data': u' ', u'type': u'SpaceCharacters'}]


								        :arg data: the text data


								        :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens


								        """

								        data = data

								        middle = data.lstrip(spaceCharacters)

								        left = data[:len(data) - len(middle)]

								        if left:

								            yield {"type": "SpaceCharacters", "data": left}

								        data = middle

								        middle = data.rstrip(spaceCharacters)

								        right = data[len(middle):]

								        if middle:

								            yield {"type": "Characters", "data": middle}

								        if right:

								            yield {"type": "SpaceCharacters", "data": right}


								    def comment(self, data):

								        """Generates a Comment token


								        :arg data: the comment


								        :returns: Comment token


								        """

								        return {"type": "Comment", "data": data}


								    def doctype(self, name, publicId=None, systemId=None):

								        """Generates a Doctype token


								        :arg name:


								        :arg publicId:


								        :arg systemId:


								        :returns: the Doctype token


								        """

								        return {"type": "Doctype",

								                "name": name,

								                "publicId": publicId,

								                "systemId": systemId}


								    def entity(self, name):

								        """Generates an Entity token


								        :arg name: the entity name


								        :returns: an Entity token


								        """

								        return {"type": "Entity", "name": name}


								    def unknown(self, nodeType):

								        """Handles unknown node types"""

								        return self.error("Unknown node type: " + nodeType)


								class NonRecursiveTreeWalker(TreeWalker):

								    def getNodeDetails(self, node):

								        raise NotImplementedError


								    def getFirstChild(self, node):

								        raise NotImplementedError


								    def getNextSibling(self, node):

								        raise NotImplementedError


								    def getParentNode(self, node):

								        raise NotImplementedError


								    def __iter__(self):

								        currentNode = self.tree

								        while currentNode is not None:

								            details = self.getNodeDetails(currentNode)

								            type, details = details[0], details[1:]

								            hasChildren = False


								            if type == DOCTYPE:

								                yield self.doctype(*details)


								            elif type == TEXT:

								                for token in self.text(*details):

								                    yield token


								            elif type == ELEMENT:

								                namespace, name, attributes, hasChildren = details

								                if (not namespace or namespace == namespaces["html"]) and name in voidElements:

								                    for token in self.emptyTag(namespace, name, attributes,

								                                               hasChildren):

								                        yield token

								                    hasChildren = False

								                else:

								                    yield self.startTag(namespace, name, attributes)


								            elif type == COMMENT:

								                yield self.comment(details[0])


								            elif type == ENTITY:

								                yield self.entity(details[0])


								            elif type == DOCUMENT:

								                hasChildren = True


								            else:

								                yield self.unknown(details[0])


								            if hasChildren:

								                firstChild = self.getFirstChild(currentNode)

								            else:

								                firstChild = None


								            if firstChild is not None:

								                currentNode = firstChild

								            else:

								                while currentNode is not None:

								                    details = self.getNodeDetails(currentNode)

								                    type, details = details[0], details[1:]

								                    if type == ELEMENT:

								                        namespace, name, attributes, hasChildren = details

								                        if (namespace and namespace != namespaces["html"]) or name not in voidElements:

								                            yield self.endTag(namespace, name)

								                    if self.tree is currentNode:

								                        currentNode = None

								                        break

								                    nextSibling = self.getNextSibling(currentNode)

								                    if nextSibling is not None:

								                        currentNode = nextSibling

								                        break

								                    else:

								                        currentNode = self.getParentNode(currentNode)