Viewing file: Processor.py (39.39 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
######################################################################## # $Header: /var/local/cvsroot/4Suite/Ft/Xml/Xslt/Processor.py,v 1.74 2005/03/06 20:53:58 uogbuji Exp $ """ XSLT processing engine
Copyright 2005 Fourthought, Inc. (USA). Detailed license and copyright information: http://4suite.org/COPYRIGHT Project home, documentation, distributions: http://4suite.org/ """
import os, sys, operator, cStringIO, warnings from xml.dom import Node
# For builtin extension elements/functions import Exslt, BuiltInExtElements
from Ft import DEFAULT_ENCODING from Ft.Lib import Uri from Ft.Xml import InputSource, Domlette, EMPTY_NAMESPACE from Ft.Xml.Lib import StripElements from Ft.Xml.Xslt import XsltContext, Error, XsltException, MessageSource from Ft.Xml.Xslt import OutputHandler, RtfWriter, StringWriter from Ft.Xml.Xslt.StylesheetReader import StylesheetReader
# Media types that signal that an xml-stylesheet PI points to an XSLT # document, when the PI contains a type pseudo-attribute. # # Note: RFC 3023 suggests application/xslt+xml, and says the +xml # suffix is not required (but is a SHOULD). If you want to use the # 'text/xsl' convention, do Processor.XSLT_IMT.append('text/xsl') # after import, but before instantiating Processor.Processor. # XSLT_IMT = ['application/xslt+xml', 'application/xslt', 'text/xml', 'application/xml']
class Processor(object): """ An XSLT processing engine (4XSLT).
Typical usage:
from Ft.Lib.Uri import OsPathToUri from Ft.Xml import InputSource from Ft.Xml.Xslt import Processor # this is just one of several ways to create InputSources styuri = OsPathToUri('/absolute/path/to/stylesheet.xslt') srcuri = OsPathToUri('/absolute/path/to/doc.xml') STY = InputSource.DefaultFactory.fromUri(styuri) SRC = InputSource.DefaultFactory.fromUri(srcuri) proc = Processor.Processor() proc.appendStylesheet(STY) result = Processor.run(SRC)
Optional constructor arguments are:
stylesheetAltUris: a list of alternative base URIs to use when resolving relative hrefs in xsl:import/xsl:include instructions. These URIs are only tried when the standard XSLT behavior of using the base URI of the xsl:import/include element itself fails to result in retrieval of a document.
documentReader: an object that will be used to parse XML source documents (not stylesheets). It defaults to Ft.Xml.Domlette.NonvalidatingReader, but it can be any object that has a parse() method that returns a DOM or Domlette tree.
implementation: a DOM implementation instance that will be used by the processor to create new source tree nodes, such as when generating result tree fragments or duplicating the source tree when runNode(node, preserveSrc=1) is called. Defaults to Ft.Xml.Domlette.implementation. Needs to have a createRootNode() method.
See the run() and runNode() methods for additional runtime options.
Important instance attributes:
.extensionParams: a dictionary that allows one to attach additional metadata to a processor instance. We use this to make invocation-specific data like HTTP query args and logfile handles available to XSLT extension functions & elements when invoking the processor via the repository's HTTP server.
.inputSourceFactory: InputSource factory instance used when obtaining source documents. Defaults to Ft.Xml.InputSource.DefaultFactory.
.mediaPref: the preferred/target media, for the purpose of picking from multiple xml-stylesheet processing instructions. Defaults to None. If set to a string, xml-stylesheet PIs without that string in their 'media' pseudo-attribute will be ignored.
.msgPrefix and .msgSuffix: strings emitted before and after xsl:message output.
.stylesheet: the complete stylesheet tree.
""" # defaults for ExtendedProcessingElements.ExtendedProcessor _4xslt_debug = False _4xslt_profile = False _4xslt_trace = False
def __init__(self, stylesheetAltUris=None, documentReader=None, implementation=None, stylesheetIncPaths=None): self._suppressMsgs = 0 self.msgPrefix = MessageSource.DEFAULT_MESSAGE_PREFIX self.msgSuffix = MessageSource.DEFAULT_MESSAGE_SUFFIX self.stylesheet = None self.inputSourceFactory = InputSource.DefaultFactory self._stylesheetAltUris = stylesheetAltUris or []
# FIXME: remove all trace of stylesheetIncPaths for 1.0 final if stylesheetIncPaths: self.warning("Deprecated 'stylesheetIncPaths' argument " \ "was used. Use 'stylesheetAltUris' instead.") self._stylesheetAltUris.extend(stylesheetIncPaths)
#Create the default reader for documents self._docReader = documentReader or Domlette.NonvalidatingReader
self._domimp = implementation or Domlette.implementation
self._lastOutputParams = None
# preferred xml-stylesheet PI 'media' pseudo-attr value self.mediaPref = None
# has the "built-in template invoked with params" warning been issued? self._builtInWarningGiven = 0
self.extFunctions = {} #Cache ext functions to give to the context
self.extElements = {} self.extElements.update(Exslt.ExtElements) self.extElements.update(BuiltInExtElements.ExtElements)
self.extensionParams = {}
# Defer creation in case the user wants to supply their own self._styReader = None
return
def getStripElements(self): if self.stylesheet: return self.stylesheet.spaceRules else: return []
def registerExtensionModules(self, moduleList): """ Registers a list of Python modules that have public ExtFunctions and/or ExtElements dictionaries.
In a Python module that contains extension implementations, define a dictionary named ExtFunctions that, for each extension function or element, maps a (namespace-URI, xpath-function-name) tuple to a direct reference to the Python function that implements the extension. To make the function available to the Processor, call this method, passing in ['your.module.name'].
See Ft.Xml.Xslt.Exslt.*, Ft.Xml.Xslt.BuiltInExtFunctions and BuiltInExtElements for working examples of extension modules. """ for mod_name in moduleList: if mod_name: mod = __import__(mod_name,{},{},['ExtFunctions']) if hasattr(mod,'ExtFunctions'): self.extFunctions.update(mod.ExtFunctions) if hasattr(mod,'ExtElements'): self.extElements.update(mod.ExtElements) return
def registerExtensionFunction(self, namespace, localName, function): """ Register a single extension function.
For example, implement your own extension function as a Python function that takes an Ft.Xml.XPath.Context.Context instance as its first argument. Then, to make the function available to the Processor, call this method, passing in the namespace URI and local name of the function, and a direct reference to the Python function that implements the extension.
See also registerExtensionModules(). """ self.extFunctions[(namespace, localName)] = function return
def registerExtensionElement(self, namespace, localName, klass): """ Register a single extension element.
For example, implement your own extension element as a subclass of Ft.Xml.Xslt.XsltElement. To make the element available to the Processor, call this method, passing in the namespace URI and local name of the element, and a direct reference to the class that implements the extension.
See also registerExtensionModules(). """ self.extElements[(namespace, localName)] = klass return
def setDocumentReader(self, docReader): """ Sets the reader used for source document input sources.
The reader can be anything with a parse() interface that returns a DOM tree. It is normally Ft.Xml.Domlette.NonvalidatingReader or whatever was specified in the Processor constructor. This method is sometimes used to set the reader to Domlette.ValidatingReader. """ self._docReader = docReader return
def getDocumentReader(self): """ Returns the reader used for source document input sources. """ return self._docReader
def setStylesheetReader(self, reader): """ Sets the reader used for stylesheet document input sources.
It is normally an instance of Ft.Xml.Xslt.StylesheetReader.StylesheetReader """ self._styReader = reader return
def getStylesheetReader(self): """ Returns the reader used for stylesheet document input sources. """ if self._styReader is None: self._styReader = StylesheetReader(self._stylesheetAltUris) return self._styReader
def __add_stylesheet(self, stylesheet): """ INTERNAL USE ONLY Helper function for adding a stylesheet to the processor. If a stylesheet has already been appended, then this method is equivalent to having, in an outer "shell" stylesheet, an xsl:import for the most recently appended stylesheet followed by an xsl:import for the given stylesheet. """ if self.stylesheet: for child in self.stylesheet.children: child.importIndex += 1000
for child in stylesheet.children: self.stylesheet.appendChild(child)
self.stylesheet.reset() self.stylesheet.setup() else: self.stylesheet = stylesheet
self.outputParams = self.stylesheet.outputParams return
def appendStylesheet(self, iSrc): """ Append a stylesheet from an InputSource.
This method establishes the stylesheet that the processor will use to do the transformation. If a stylesheet has already been appended, then this method is equivalent to having, in an outer "shell" stylesheet, an xsl:import for the most recently appended stylesheet followed by an xsl:import for the stylesheet accessible via the given InputSource. """ reader = self.getStylesheetReader() stylesheet = reader.fromSrc(iSrc, self.extElements) self.__add_stylesheet(stylesheet) return
def appendStylesheetInstance(self, instance, refUri=''): """ Append an "instant" ("compiled") stylesheet, which is a pickled Ft.Xml.Xslt.Stylesheet.Stylesheet object that has had its setup() method called already. Such an instance can be obtained from another processor, p, as p.stylesheet.root, which you can then pickle, save to disk, and reuse in a new processor via this method.
This method establishes the stylesheet that the processor will use to do the transformation. If a stylesheet has already been appended, then this method is equivalent to having an xsl:import of the new stylesheet in the most recently appended stylesheet.
An exception will be raised if the same stylesheet is appended more than once, just as if the same stylesheet were imported more than once.
refUri is the base URI to assume for the stylesheet. It defaults to the base URI of the root node of the original stylesheet document with the highest import precedence.
Note: Using the instant stylesheet tends to be less efficient than using the original document and appendStylesheet(), unless the stylesheet is large and complex, like DocBook XSL. """ baseUri = refUri if not baseUri: # StylesheetTree nodes only ever have baseUri # (not baseURI, documentURI, or refUri) if hasattr(instance.root, 'baseUri'): baseUri = instance.root.baseUri reader = self.getStylesheetReader() stylesheet = reader.fromInstant(instance, baseUri=baseUri, is_import=True) self.__add_stylesheet(stylesheet) return
def appendStylesheetNode(self, node, refUri='', factory=None): """ Append a stylesheet given as a DOM or Domlette Document node.
This method establishes the stylesheet that the processor will use to do the transformation. If a stylesheet has already been appended, then this method is equivalent to having an xsl:import of the new stylesheet in the most recently appended stylesheet.
An exception will be raised if the same stylesheet is appended more than once, just as if the same stylesheet were imported more than once.
refUri is the base URI to assume for the stylesheet. It defaults to the base URI of the given node.
The given InputSourceFactory will be used in order to read external entities. It defaults to Ft.Xml.InputSource.DefaultFactory. """ document = node.ownerDocument or node reader = self.getStylesheetReader() stylesheet = reader.fromDocument(document, refUri, factory) self.__add_stylesheet(stylesheet) return
def run(self, iSrc, ignorePis=0, topLevelParams=None, writer=None, outputStream=None): """ Transform a source document as given via an InputSource.
Assumes that either the Processor instance has already had stylesheets appended (via appendStylesheet(), for example), or the source document contains xml-stylesheet processing instructions that are not being ignored.
The ignorePis flag, if set, will cause xml-stylesheet processing instructions in the source document to be ignored.
The topLevelParams argument is an optional dictionary of stylesheet parameters, the keys of which may be given as strings if they have no namespace, or as (uri, localname) tuples otherwise.
The optional writer argument is a SAX-like event handler that is an Ft.Xml.Xslt.NullWriter subclass. The default writer is either an Ft.Xml.Xslt.XmlWriter, HtmlWriter or PlainTextWriter, depending on the stylesheet(s).
The optional outputStream argument is a Python file-like object to be used as the destination for the writer's output. """ #Update the strip elements #Assume that the ones from XSLT have higher priority ns = self.getStripElements() for s in iSrc.stripElements: ns.append(s) iSrc.stripElements = ns try: src = self._docReader.parse(iSrc) except Exception, e: raise XsltException(Error.SOURCE_PARSE_ERROR, iSrc.uri or '<Python string>', e) if not ignorePis and self.__checkStylesheetPis(src, iSrc): #Do it again with updates WS strip lists
#NOTE: There is a case where this will produce the wrong results. If, there were #previous stylesheets that defined removing white space, then the #processing instruction referenced a stylesheet that overrode some of these #whitespace processing rules, the original trimmed space will be lost
#Regardless, we need to remove any new whitespace defined in the PI self._stripElements(src)
return self.execute(src, iSrc, ignorePis, topLevelParams, writer, outputStream)
def runNode(self, node, sourceUri=None, ignorePis=0, topLevelParams=None, writer=None, outputStream=None, preserveSrc=0, docInputSource=None): """ Transform a source document as given via a Domlette document node.
Use Ft.Xml.Domlette.ConvertDocument() to create a Domlette from some other type of DOM.
Assumes that either the Processor instance has already had stylesheets appended (via appendStylesheet(), for example), or the source document contains xml-stylesheet processing instructions that are not being ignored.
sourceUri - The absolute URI of the document entity that the node represents, and should be explicitly provided, even if it is available from the node itself.
ignorePis - (flag) If set, will cause xml-stylesheet processing instructions in the source document to be ignored.
topLevelParams - optional dictionary of stylesheet parameters, the keys of which may be given as strings if they have no namespace, or as (uri, localname) tuples otherwise.
writer - optional SAX-like event handler that is an Ft.Xml.Xslt.NullWriter subclass. The default writer is either an Ft.Xml.Xslt.XmlWriter, HtmlWriter or PlainTextWriter, depending on the stylesheet(s).
outputStream - optional Python file-like object to be used as the destination for the writer's output.
preserveSrc - (flag) If set signals that the source DOM should not be mutated, as would normally happen when honoring XSLT whitespace stripping requirements. Setting preserveSrc results in the creation of a copy of the source DOM.
isrc - optional input source used strictly for further resolution relative the given DOM """
if node.nodeType != Node.DOCUMENT_NODE: raise ValueError(MessageSource.g_errorMessages[ Error.CANNOT_TRANSFORM_FRAGMENT])
if hasattr(node, 'baseURI'): node_baseUri = node.baseURI elif hasattr(node, 'refUri'): node_baseUri = node.refUri else: node_baseUri = None
#A base URI must be absolute, but DOM L3 Load & Save allows # implementation-dependent behavior if the URI is actually # relative, empty or missing. We'll generate a URN for the # InputSource's benefit if the base URI is empty/missing. # Relative URIs can pass through; the resolvers will handle # them appropriately (we hope). if not sourceUri: sourceUri = node_baseUri or Uri.BASIC_RESOLVER.generate()
if preserveSrc: #preserve the node's baseURI so our DOM is a true copy newDoc = self._domimp.createRootNode(node_baseUri) for child in node.childNodes: new_node = newDoc.importNode(child,1) newDoc.appendChild(new_node) node = newDoc
self._stripElements(node)
if not docInputSource: #Create a dummy iSrc docInputSource = InputSource.InputSource( None, sourceUri, processIncludes=1, stripElements=self.getStripElements(), factory=self.inputSourceFactory)
if not ignorePis and self.__checkStylesheetPis(node, docInputSource): #Do it again with updated WS strip lists
#NOTE: There is a case where this will produce the wrong results. If, there were #previous stylesheets that defined removing white space, then the #processing instruction referenced a stylesheet that overrode some of these #whitespace processing rules, the original trimmed space will be lost
#Regardless, we need to remove any new whitespace defined in the PI self._stripElements(node)
return self.execute(node, docInputSource, ignorePis=ignorePis, topLevelParams=topLevelParams, writer=writer, outputStream=outputStream)
def __cmp_stys(self, a, b): """ Internal function to assist in sorting xml-stylesheet processing instructions. See __checkStylesheetPis(). """ # sort by priority (natural order) return cmp(a[0], b[0]) ## ## For future reference, to support more advanced ## preferences, such as having an ordered list of ## preferred target media values rather than just one, ## and using the Internet media type list in a similar ## fashion, we can sort on multiple pseudo-attrs like ## this: ## ## sort by priority (natural order) #if cmp(a[0], b[0]): # return cmp(a[0], b[0]) ## then media (natural order) #elif cmp(a[1], b[1]): # return cmp(a[1], b[1]) ## then type (XSLT_IMT order) #else: # for imt in XSLT_IMT: # if a[2] == imt: # return b[2] != imt # else: # return -(b[2] == imt)
def __checkStylesheetPis(self, node, inputSource): """ Looks for xml-stylesheet processing instructions that are children of the given node's root node, and calls appendStylesheet() for each one, unless it does not have an RFC 3023 compliant 'type' pseudo-attribute or does not have a 'media' pseudo-attribute that matches the preferred media type that was set as Processor.mediaPref. Uses the given InputSource to resolve the 'href' pseudo-attribute. If the instruction has an alternate="yes" pseudo-attribute, it is treated as a candidate for the first stylesheet only. """ # relevant links: # http://www.w3.org/TR/xml-stylesheet/ # http://lists.fourthought.com/pipermail/4suite/2001-January/001283.html # http://lists.fourthought.com/pipermail/4suite/2003-February/005088.html # http://lists.fourthought.com/pipermail/4suite/2003-February/005108.html # # The xml-stylsheet spec defers to HTML 4.0's LINK element # for semantics. It is not clear in HTML how the user-agent # should interpret multiple LINK elements with rel="stylesheet" # and without alternate="yes". In XSLT processing, we, like # Saxon, choose to treat such subsequent non-alternates as # imports (i.e. each non-alternate stylesheet is imported by # the previous one). # # Given that alternates can appear before or after the # non-alternate, there's no way to know whether they apply # to the preceding or following non-alternate. So we choose # to just treat alternates as only applying to the selection # of the first stylesheet. # # Also, the absence of processing guidelines means we can't # know whether to treat the absence of a 'media' pseudo-attr # as implying that this is a default stylesheet (e.g. when the # preferred media is "foo" and there is no "foo", you use # this stylesheet), or whether to treat it as only being the # appropriate stylesheet when no media preference is given to # the processor. # # Furthermore, if more than one candidate for the first # stylesheet is a match on the 'media' preference (or lack # thereof), it's not clear what to do. Do we give preference # to the one with a 'type' that is considered more favorable # due to its position in the XSLT_IMT list? Do we just use the # first one? The last one? For now, if there's one that does # not have alternate="yes", we use that one; otherwise we use # the first one. Thus, given # <?xml-stylesheet type="application/xslt+xml" href="sty0"?> # <?xml-stylesheet type="application/xslt+xml" href="sty1" # alternate="yes"?> # sty0 is used, even if the PIs are swapped; whereas if the # only choices are # <?xml-stylesheet type="application/xslt+xml" href="sty1" # alternate="yes"?> # <?xml-stylesheet type="application/xslt+xml" href="sty2" # alternate="yes"?> # then sty1 is used because it comes first. root = node.rootNode c = 1 # count of alternates, +1 found_nonalt = 0 stys = [] for child in root.childNodes: # only look at prolog, not anything that comes after it if child.nodeType == Node.ELEMENT_NODE: break # build dict of pseudo-attrs for the xml-stylesheet PIs if child.nodeType == Node.PROCESSING_INSTRUCTION_NODE: if child.target == 'xml-stylesheet': data = child.data.split() pseudo_attrs = {} for d in data: seg = d.split('=') if len(seg) == 2: pseudo_attrs[seg[0]] = seg[1][1:-1]
# PI must have both href, type pseudo-attributes; # type pseudo-attr must match valid XSLT types; # media pseudo-attr must match preferred media # (which can be None) if pseudo_attrs.has_key('href') and \ pseudo_attrs.has_key('type'): href = pseudo_attrs['href'] imt = pseudo_attrs['type'] media = pseudo_attrs.get('media') # defaults to None if media == self.mediaPref and imt in XSLT_IMT: if pseudo_attrs.has_key('alternate') and \ pseudo_attrs['alternate'] == 'yes': stys.append((1, media, imt, pseudo_attrs['href'])) elif found_nonalt: c += 1 stys.append((c, media, imt, pseudo_attrs['href'])) else: stys.append((0, media, imt, pseudo_attrs['href'])) found_nonalt = 1
stys.sort(self.__cmp_stys)
# Assume stylesheets for irrelevant media and disallowed IMTs # are filtered out. Assume stylesheets are in ascending order # by level. Now just use first stylesheet at each level, but # treat levels 0 and 1 the same. Meaning of the levels: # level 0 is first without alternate="yes" # level 1 is all with alternate="yes" # levels 2 and up are the others without alternate="yes" hrefs = [] last_level = -1 #print "stys=",repr(stys) for sty in stys: level = sty[0] if level == 1 and last_level == 0: # we want to ignore level 1s if we had a level 0 last_level = 1 if level == last_level: # proceed to next level (effectively, we only use # the first stylesheet at each level) continue last_level = level hrefs.append(sty[3])
if hrefs: self.getStylesheetReader()._import_index += 1 for href in hrefs: # Resolve the PI with the InputSource for the document # containing the PI, so relative hrefs work correctly new_source = inputSource.resolve(href, '', 'xml-stylesheet PI') self.appendStylesheet(new_source)
# Return true if any xml-stylesheet PIs were processed # (i.e., the stylesheets they reference are going to be used) return not not hrefs
def execute(self, node, docInputSource, ignorePis=0, topLevelParams=None, writer=None, outputStream=None): """ Warning: do not call this method directly unless you know what you're doing. If unsure, you probably want the runNode method.
Runs the stylesheet processor against the given XML DOM node with the stylesheets that have been registered. It does not mutate the source. If writer is given, it is used in place of the default output method decisions for choosing the proper writer. """ #QUESTION: What about ws stripping? #ANSWER: Whitespace stripping happens only in the run*() interfaces. # This method is use-at-your-own-risk. The XSLT conformance of the # source is maintained by the caller. This exists as a performance # hook. topLevelParams = topLevelParams or {}
self.attributeSets = {} self.keys = {}
#See f:chain-to extension element self.chainTo = None self.chainParams = None
if not self.stylesheet: raise XsltException(Error.NO_STYLESHEET)
# Use an internal stream to gather the output only if the caller # didn't supply other means of retrieving it. internalStream = writer is None and outputStream is None
if not writer: # Use OutputHandler to determine the real writer to use. outputStream = outputStream or cStringIO.StringIO() writer = OutputHandler.OutputHandler(self.outputParams, outputStream, self.__writerChanged) self.writers = [writer]
# Setup the named templates self._namedTemplates = self.stylesheet.getNamedTemplates()
# Initialize any stylesheet parameters tlp = topLevelParams.copy() self._normalizeParams(tlp) self._documentInputSource = docInputSource
# Prepare the stylesheet for processing self.stylesheet.primeStylesheet(node, self, tlp, docInputSource.uri)
# Create the context used for processing variables = self.stylesheet.getGlobalVariables() functions = self.stylesheet.getInitialFunctions() context = XsltContext.XsltContext(node, 1, 1, None, varBindings=variables, processor=self, extFunctionMap=functions) context.documents.update(self.stylesheet.root.sourceNodes) context.addDocument(node, docInputSource.uri)
# Process the document self.writers[-1].startDocument() self.applyTemplates(context) self.writers[-1].endDocument()
# Perform cleanup self.stylesheet.idle(node, self, docInputSource.uri)
#How does this contrast with access to self.outputParams ? self._lastOutputParams = self.writers[-1]._outputParams
del self.writers[:]
if internalStream: # Get the result from our cStringIO 'stream'. result = outputStream.getvalue() else: # It is the callers responsibility to get the result result = u"" return result
def applyTemplates(self, context, params=None): """ Intended to be used by XSLT instruction implementations only.
Implements the xsl:apply-templates instruction by attempting to let the stylesheet apply its own template for the given context. If the stylesheet does not have a matching template, the built-in templates are invoked.
context is an XsltContext instance. params is a dictionary of parameters being passed in, defaulting to None. """ params = params or {} if not self.stylesheet.applyTemplates(context, self, params): # No matching templates found, use builtin templates if params and not self._builtInWarningGiven: self.warning(MessageSource.BUILTIN_TEMPLATE_WITH_PARAMS) self._builtInWarningGiven = 1 if context.node.nodeType == Node.TEXT_NODE: self.writers[-1].text(context.node.data) elif context.node.nodeType in [Node.ELEMENT_NODE, Node.DOCUMENT_NODE]: state = context.copy() node_set = context.node.childNodes size = len(node_set) pos = 1 for node in node_set: context.node, context.position, context.size = \ node, pos, size self.applyTemplates(context) pos += 1 context.set(state) elif context.node.nodeType == Node.ATTRIBUTE_NODE: self.writers[-1].text(context.node.value) return
def xslMessage(self, msg): """ Intended to be used by XSLT instruction implementations only.
Used by xsl:message to emit a message to sys.stderr, unless such messages are suppressed (see messageControl()). Uses the msgPrefix & msgSuffix instance attributes. """ if not self._suppressMsgs: sys.stderr.write(self.msgPrefix) sys.stderr.write(msg) sys.stderr.write(self.msgSuffix) sys.stderr.flush() return
#FIXME: make this _warning? def warning(self, message): """ Emits a warning via Python's warnings framework, unless warnings are suppressed (see messageControl()).
Used, for example, to announce that built-in templates are being invoked with params. """ if not self._suppressMsgs: # Using level=2 to show the stack where the warning occured. warnings.warn(message, stacklevel=2) return
def messageControl(self, suppress): """ Controls whether the processor emits warnings and xsl:message messages. Call with suppress=1 to suppress such output. """ self._suppressMsgs = suppress return
def __writerChanged(self, newWriter): """ Private callback function to allow the OutputHandler to replace itself in the writer stack. See addHandler(). """ self.writers[-1] = newWriter return
def addHandler(self, outputParams, stream): """ Intended to be used by XSLT instruction implementations only.
Sets up the processor to start processing subsequently generated content with an output writer wrapper that first determines which XSLT output method is going to be used (i.e., by looking at the output parameters or waiting to see if an 'html' element is the first new node generated), then replaces itself with the appropriate writer instance.
outputParams is an Ft.Xml.Xslt.OutputParameters instance.
stream will be passed on to the constructor of the real writer. """ handler = OutputHandler.OutputHandler(outputParams, stream, self.__writerChanged) self.writers.append(handler) handler.startDocument() return
def removeHandler(self): """ Intended to be used by XSLT instruction implementations only.
Deletes the most recently added output writer. """ self.writers[-1].endDocument() del self.writers[-1] return
def pushResultTree(self, baseUri, implementation=None): """ Intended to be used by XSLT instruction implementations only.
Sets up the processor to start processing subsequently generated content with a new output writer that produces a separate document. The new document will have the given baseUri as its URI. This is used to generate result tree fragments.
Allows specifying an alternative DOM implementation for the creation of the new document. """ writer = RtfWriter.RtfWriter(self.outputParams, baseUri, implementation or self._domimp) self.writers.append(writer) return
def pushResultString(self): """ Intended to be used by XSLT instruction implementations only.
Sets up the processor to start processing subsequently generated content with an output writer that buffers the text from text events and keeps track of whether non-text events occurred. This is used by the implementations of XSLT instructions such as xsl:attribute. """ writer = StringWriter.StringWriter(self.outputParams) self.writers.append(writer) return
def pushResult(self, handler=None): """ Intended to be used by XSLT instruction implementations only.
Sets up the processor to start processing subsequently generated content with a new output writer (the given handler of SAX-like output events). """ if handler is None: warnings.warn("Use pushResultTree(uri) to create RTFs", DeprecationWarning, stacklevel=2) handler = RtfWriter.RtfWriter(self.outputParams, self.stylesheet.baseUri) self.writers.append(handler) handler.startDocument() return
def popResult(self): """ Intended to be used by XSLT instruction implementations only.
Ends temporary output writing that was started with pushResultString(), pushResultTree(), or pushResult(), and returns the result. """ handler = self.writers.pop() handler.endDocument() return handler.getResult()
def output(self): warnings.warn("output() deprecated; use writer", DeprecationWarning, 2) return self.writer
def writer(self): """ Intended to be used by XSLT instruction implementations only.
Returns the current output writer. """ return self.writers[-1]
writer = property(writer)
def _stripElements(self, node): StripElements.StripElements(node, self.getStripElements()) return
def _normalizeParams(self, params): """ params is a dictionary of top-level parameters. The main task is to check this dictionary for lists of strings and convert these to a node set of text nodes """
def to_unicode(s): try: # Try UTF-8 return unicode(s, 'UTF-8') except ValueError: # Use encoding from locale try: return unicode(s, DEFAULT_ENCODING) except ValueError: #FIXME: l10n raise ValueError( "String parameters must be Unicode objects or " "strings encoded as UTF-8 or %s." % DEFAULT_ENCODING)
for k, v in params.items(): if v: if isinstance(v, str): params[k] = to_unicode(v) elif isinstance(v, list) and isinstance(v[0], (str, unicode)): doc = self._domimp.createRootNode(self.stylesheet.baseUri) nodeset = [] for text in v: if isinstance(text, str): text = to_unicode(text) nodeset.append(doc.createTextNode(text)) params[k] = nodeset return
def reset(self): """ Returns the processor to a state where it can be used to do a new transformation with a new stylesheet. Deletes the current stylesheet tree, and may do other cleanup. """ self.stylesheet = None self.getStylesheetReader().reset() return
|