Viewing file: XmlPrinter.py (10.78 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
######################################################################## # $Header: /var/local/cvsroot/4Suite/Ft/Xml/Lib/XmlPrinter.py,v 1.13 2005/02/09 09:11:13 mbrown Exp $ """ This module supports document serialization in XML syntax.
Copyright 2005 Fourthought, Inc. (USA). Detailed license and copyright information: http://4suite.org/COPYRIGHT Project home, documentation, distributions: http://4suite.org/ """
import cStreamWriter
class XmlPrinter: """ An XmlPrinter instance provides functions for serializing an XML or XML-like document to a stream, based on SAX-like event calls initiated by an Ft.Xml.Lib.Print.PrintVisitor instance.
The methods in this base class attempt to emit a well-formed parsed general entity conformant to XML 1.0 syntax, with no extra whitespace added for visual formatting. Subclasses may emit documents conformant to other syntax specifications or with additional whitespace for indenting.
The degree of well-formedness of the output depends on the data supplied in the event calls; no checks are done for conditions that would result in syntax errors, such as two attributes with the same name, "--" in a comment, etc. However, attribute() will do nothing if the previous event was not startElement(), thus preventing spurious attribute serializations. """
def __init__(self, stream, encoding): """ Creates an XmlPrinter instance.
stream must be a file-like object open for writing binary data. encoding specifies the encoding which is to be used for writing to the stream. """ self.stream = sw = cStreamWriter.StreamWriter(stream, encoding) self.encoding = encoding self.writeAscii = sw.writeAscii self.writeEncode = sw.writeEncode self.writeEscape = sw.writeEscape self._inElement = False return
def startDocument(self, version='1.0', standalone=None): """ Handles a startDocument event.
Writes XML declaration or text declaration to the stream. """ self.writeAscii('<?xml version="%s" encoding="%s"' % (version, self.encoding)) if standalone is not None: self.writeAscii(' standalone="%s"' % standalone)
self.writeAscii('?>\n') return
def endDocument(self): """ Handles an endDocument event.
Writes any necessary final output to the stream. """ if self._inElement: # No element content, use minimized form self.writeAscii('/>') return
def doctype(self, name, publicId, systemId): """ Handles a doctype event.
Writes a document type declaration to the stream. """ if self._inElement: self.writeAscii('>') self._inElement = False if publicId and systemId: self.writeAscii('<!DOCTYPE ') self.writeEncode(name, 'document type name') self.writeAscii(' PUBLIC "') self.writeEncode(publicId, 'document type public-id') self.writeAscii('" "') self.writeEncode(systemId, 'document type system-id') self.writeAscii('">\n') elif systemId: self.writeAscii('<!DOCTYPE ') self.writeEncode(name, 'document type name') self.writeAscii(' SYSTEM "') self.writeEncode(systemId, 'document type system-id') self.writeAscii('">\n') return
def startElement(self, namespaceUri, tagName, namespaces, attributes): """ Handles a startElement event.
Writes part of an element's start-tag or empty-element tag to the stream, and closes the start tag of the previous element, if one remained open. Writes the xmlns attributes for the given dictionary of namespaces, and invokes attribute() as neeeded to write the given dictionary of attributes.
The namespaceUri argument is ignored in this class. """ if self._inElement: # Close current start tag self.writeAscii('>') else: self._inElement = True
self.writeAscii('<') self.writeEncode(tagName, 'start-tag name')
# Write the namespaces for prefix, uri in namespaces.items(): if prefix: self.attribute(namespaceUri, tagName, u"xmlns:"+prefix, uri) else: self.attribute(namespaceUri, tagName, u"xmlns", uri)
# Now the attributes for name, value in attributes.items(): self.attribute(namespaceUri, tagName, name, value) return
def endElement(self, namespaceUri, tagName): """ Handles an endElement event.
Writes the closing tag for an element to the stream, or, if the element had no content, finishes writing the empty element tag.
The namespaceUri argument is ignored in this class. """ if self._inElement: # No element content, use minimized form self.writeAscii('/>') self._inElement = False else: self.writeAscii('</') self.writeEncode(tagName, 'end-tag name') self.writeAscii('>') return
# elementUri and elementName are only needed for HTML output def attribute(self, elementUri, elementName, name, value): """ Handles an attribute event.
Writes an attribute to the stream as a space followed by the name, '=', and quote-delimited value. It is the caller's responsibility to ensure that this is called in the correct context, if well-formed output is desired.
Preference is given to quotes (\") around attribute values, in accordance with the DomWriter interface in DOM Level 3 Load and Save (25 July 2002 WD), although a value that contains quotes but no apostrophes will be delimited by apostrophes (') instead. The elementName arguments are not used by default, but may be used by subclasses. """ self.writeAscii(" ") self.writeEncode(name, 'attribute name') self.writeAscii("=")
# Replace characters illegal in attribute values # Wrap the value with appropriate quoting in accordance with # DOM Level 3 Load and Save: # 1. Attributes not containing quotes are serialized in quotes. # 2. Attributes containing quotes but no apostrophes are serialized # in apostrophes. # 3. Attributes containing both forms of quotes are serialized in # quotes, with quotes within the value represented by the # predefined entity ". if (u'"' in value) and (u"'" not in value): # Use apostrophes (#2) entitymap = self.attrEntitiesApos quote = "'" else: # Use quotes (#1 and #3) entitymap = self.attrEntitiesQuot quote = '"'
self.writeAscii(quote) self.writeEscape(value, entitymap) self.writeAscii(quote) return
def text(self, text, disableEscaping=0): """ Handles a text event.
Writes character data to the stream. If the disableEscaping flag is not set, then unencodable characters are replaced with numeric character references; "&" and "<" are escaped as "&" and "<"; and ">" is escaped as ">" if it is preceded by "]]". If the disableEscaping flag is set, then the characters are written to the stream with no escaping of any kind, which will result in an exception if there are unencodable characters. """ if self._inElement: self.writeAscii('>') self._inElement = False
if disableEscaping: # Try to write the raw encoded string (may throw exception) self.writeEncode(text, 'text') else: # FIXME: only escape ">" if after "]]" # (may not be worth the trouble) self.writeEscape(text, self.textEntities) return
def cdataSection(self, data): """ Handles a cdataSection event.
Writes character data to the stream as a CDATA section. """ if self._inElement: self.writeAscii('>') self._inElement = False
sections = data.split(u']]>') self.writeAscii('<![CDATA[') self.writeEncode(sections[0], 'CDATA section') for section in sections[1:]: self.writeAscii(']]]]><![CDATA[>') self.writeEncode(section, 'CDATA section') self.writeAscii(']]>') return
def processingInstruction(self, target, data): """ Handles a processingInstruction event.
Writes a processing instruction to the stream. """ if self._inElement: self.writeAscii('>') self._inElement = False
self.writeAscii('<?') self.writeEncode(target, 'processing instruction target') if data: self.writeAscii(' ') self.writeEncode(data, 'processing instruction data') self.writeAscii('?>') return
def comment(self, data): """ Handles a comment event.
Writes a comment to the stream. """ if self._inElement: self.writeAscii('>') self._inElement = False
self.writeAscii("<!--") self.writeEncode(data, 'comment') self.writeAscii("-->") return
# Entities as defined by Canonical XML 1.0 (http://www.w3.org/TR/xml-c14n) # For XML 1.1, add u'\u0085': '…' and u'\u2028': '
' to all textEntities = cStreamWriter.EntityMap({'<' : '<', '>' : '>', '&' : '&', '\r' : ' ', })
attrEntitiesQuot = cStreamWriter.EntityMap({'<' : '<', '&' : '&', '\t' : '	', '\n' : ' ', '\r' : ' ', '"' : '"', })
attrEntitiesApos = cStreamWriter.EntityMap({'<' : '<', '&' : '&', '\t' : '	', '\n' : ' ', '\r' : ' ', "'" : ''', })
|