# $Id: docutils_xml.py 8368 2019-08-27 12:10:14Z milde $ # Author: David Goodger, Paul Tremblay, Guenter Milde # Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. """ Simple document tree Writer, writes Docutils XML according to http://docutils.sourceforge.net/docs/ref/docutils.dtd. """ __docformat__ = 'reStructuredText' import sys import xml.sax.saxutils import docutils from docutils import frontend, writers, nodes if sys.version_info >= (3, 0): from io import StringIO # noqa else: from StringIO import StringIO # noqa if sys.version_info >= (3, 0): unicode = str # noqa class RawXmlError(docutils.ApplicationError): pass class Writer(writers.Writer): supported = ('xml',) """Formats this writer supports.""" settings_spec = ( '"Docutils XML" Writer Options', None, (('Generate XML with newlines before and after tags.', ['--newlines'], {'action': 'store_true', 'validator': frontend.validate_boolean}), ('Generate XML with indents and newlines.', ['--indents'], #@ TODO use integer value for number of spaces? {'action': 'store_true', 'validator': frontend.validate_boolean}), ('Omit the XML declaration. Use with caution.', ['--no-xml-declaration'], {'dest': 'xml_declaration', 'default': 1, 'action': 'store_false', 'validator': frontend.validate_boolean}), ('Omit the DOCTYPE declaration.', ['--no-doctype'], {'dest': 'doctype_declaration', 'default': 1, 'action': 'store_false', 'validator': frontend.validate_boolean}),)) settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'} config_section = 'docutils_xml writer' config_section_dependencies = ('writers',) output = None """Final translated form of `document`.""" def __init__(self): writers.Writer.__init__(self) self.translator_class = XMLTranslator def translate(self): self.visitor = visitor = self.translator_class(self.document) self.document.walkabout(visitor) self.output = ''.join(visitor.output) class XMLTranslator(nodes.GenericNodeVisitor): xml_declaration = '<?xml version="1.0" encoding="%s"?>\n' # TODO: add stylesheet options similar to HTML and LaTeX writers? #xml_stylesheet = '<?xml-stylesheet type="text/xsl" href="%s"?>\n' doctype = ( '<!DOCTYPE document PUBLIC' ' "+//IDN docutils.sourceforge.net//DTD Docutils Generic//EN//XML"' ' "http://docutils.sourceforge.net/docs/ref/docutils.dtd">\n') generator = '<!-- Generated by Docutils %s -->\n' xmlparser = xml.sax.make_parser() """SAX parser instance to check/exctract raw XML.""" xmlparser.setFeature( "http://xml.org/sax/features/external-general-entities", True) def __init__(self, document): nodes.NodeVisitor.__init__(self, document) # Reporter self.warn = self.document.reporter.warning self.error = self.document.reporter.error # Settings self.settings = settings = document.settings self.indent = self.newline = '' if settings.newlines: self.newline = '\n' if settings.indents: self.newline = '\n' self.indent = ' ' #@ TODO make this configurable? self.level = 0 # indentation level self.in_simple = 0 # level of nesting inside mixed-content elements self.fixed_text = 0 # level of nesting inside FixedText elements # Output self.output = [] if settings.xml_declaration: self.output.append( self.xml_declaration % settings.output_encoding) if settings.doctype_declaration: self.output.append(self.doctype) self.output.append(self.generator % docutils.__version__) # initialize XML parser self.the_handle=TestXml() self.xmlparser.setContentHandler(self.the_handle) # generic visit and depart methods # -------------------------------- simple_nodes = (nodes.TextElement, nodes.image, nodes.colspec, nodes.transition) # empty elements def default_visit(self, node): """Default node visit method.""" if not self.in_simple: self.output.append(self.indent*self.level) self.output.append(node.starttag(xml.sax.saxutils.quoteattr)) self.level += 1 # @@ make nodes.literal an instance of FixedTextElement? if isinstance(node, (nodes.FixedTextElement, nodes.literal)): self.fixed_text += 1 if isinstance(node, self.simple_nodes): self.in_simple += 1 if not self.in_simple: self.output.append(self.newline) def default_departure(self, node): """Default node depart method.""" self.level -= 1 if not self.in_simple: self.output.append(self.indent*self.level) self.output.append(node.endtag()) if isinstance(node, (nodes.FixedTextElement, nodes.literal)): self.fixed_text -= 1 if isinstance(node, self.simple_nodes): self.in_simple -= 1 if not self.in_simple: self.output.append(self.newline) # specific visit and depart methods # --------------------------------- def visit_Text(self, node): text = xml.sax.saxutils.escape(node.astext()) # indent text if we are not in a FixedText element: if not self.fixed_text: text = text.replace('\n', '\n'+self.indent*self.level) self.output.append(text) def depart_Text(self, node): pass def visit_raw(self, node): if 'xml' not in node.get('format', '').split(): # skip other raw content? # raise nodes.SkipNode self.default_visit(node) return # wrap in <raw> element self.default_visit(node) # or not? xml_string = node.astext() self.output.append(xml_string) self.default_departure(node) # or not? # Check validity of raw XML: if isinstance(xml_string, unicode) and sys.version_info < (3, 0): xml_string = xml_string.encode('utf8') try: self.xmlparser.parse(StringIO(xml_string)) except xml.sax._exceptions.SAXParseException as error: col_num = self.the_handle.locator.getColumnNumber() line_num = self.the_handle.locator.getLineNumber() srcline = node.line if not isinstance(node.parent, nodes.TextElement): srcline += 2 # directive content start line msg = 'Invalid raw XML in column %d, line offset %d:\n%s' % ( col_num, line_num, node.astext()) self.warn(msg, source=node.source, line=srcline+line_num-1) raise nodes.SkipNode # content already processed class TestXml(xml.sax.handler.ContentHandler): def setDocumentLocator(self, locator): self.locator = locator