""" SAX driver for the pyexpat C module. This driver works with pyexpat.__version__ == '2.22'. """ version = "0.20" from xml.sax._exceptions import * from xml.sax.handler import feature_validation, feature_namespaces from xml.sax.handler import feature_namespace_prefixes from xml.sax.handler import feature_external_ges, feature_external_pes from xml.sax.handler import feature_string_interning from xml.sax.handler import property_xml_string, property_interning_dict # xml.parsers.expat does not raise ImportError in Jython import sys if sys.platform[:4] == "java": raise SAXReaderNotAvailable("expat not available in Java", None) del sys try: from xml.parsers import expat except ImportError: raise SAXReaderNotAvailable("expat not supported", None) else: if not hasattr(expat, "ParserCreate"): raise SAXReaderNotAvailable("expat not supported", None) from xml.sax import xmlreader, saxutils, handler AttributesImpl = xmlreader.AttributesImpl AttributesNSImpl = xmlreader.AttributesNSImpl # If we're using a sufficiently recent version of Python, we can use # weak references to avoid cycles between the parser and content # handler, otherwise we'll just have to pretend. try: import _weakref except ImportError: def _mkproxy(o): return o else: import weakref _mkproxy = weakref.proxy del weakref, _weakref # --- ExpatLocator class ExpatLocator(xmlreader.Locator): """Locator for use with the ExpatParser class. This uses a weak reference to the parser object to avoid creating a circular reference between the parser and the content handler. """ def __init__(self, parser): self._ref = _mkproxy(parser) def getColumnNumber(self): parser = self._ref if parser._parser is None: return None return parser._parser.ErrorColumnNumber def getLineNumber(self): parser = self._ref if parser._parser is None: return 1 return parser._parser.ErrorLineNumber def getPublicId(self): parser = self._ref if parser is None: return None return parser._source.getPublicId() def getSystemId(self): parser = self._ref if parser is None: return None return parser._source.getSystemId() # --- ExpatParser class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): """SAX driver for the pyexpat C module.""" def __init__(self, namespaceHandling=0, bufsize=2**16-20): xmlreader.IncrementalParser.__init__(self, bufsize) self._source = xmlreader.InputSource() self._parser = None self._namespaces = namespaceHandling self._lex_handler_prop = None self._parsing = 0 self._entity_stack = [] self._external_ges = 1 self._interning = None self._namespace_prefixes = 1 # XMLReader methods def parse(self, source): "Parse an XML document from a URL or an InputSource." source = saxutils.prepare_input_source(source) self._source = source self.reset() self._cont_handler.setDocumentLocator(ExpatLocator(self)) try: xmlreader.IncrementalParser.parse(self, source) finally: # Drop reference to Expat parser, but read potential # error state before that. Also, if close has completed, # we don't have a parser anymore, anyway. if self._parser: self._ColumnNumber = self._parser.ErrorColumnNumber self._LineNumber = self._parser.ErrorLineNumber self._parser = None def prepareParser(self, source): if source.getSystemId() != None: self._parser.SetBase(source.getSystemId()) # Redefined setContentHandler to allow changing handlers during parsing def setContentHandler(self, handler): xmlreader.IncrementalParser.setContentHandler(self, handler) if self._parsing: self._reset_cont_handler() def getFeature(self, name): if name == feature_namespaces: return self._namespaces elif name == feature_string_interning: return self._interning is not None elif name == feature_namespace_prefixes: return self._namespace_prefixes elif name in (feature_validation, feature_external_pes): return 0 elif name == feature_external_ges: return self._external_ges raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def setFeature(self, name, state): if self._parsing: raise SAXNotSupportedException("Cannot set features while parsing") if name == feature_namespaces: self._namespaces = state elif name == feature_external_ges: self._external_ges = state elif name == feature_string_interning: if state: if self._interning is None: self._interning = {} else: self._interning = None elif name == feature_namespace_prefixes: self._namespace_prefixes = state elif name == feature_validation: if state: raise SAXNotSupportedException( "expat does not support validation") elif name == feature_external_pes: if state: raise SAXNotSupportedException( "expat does not read external parameter entities") else: raise SAXNotRecognizedException( "Feature '%s' not recognized" % name) def getProperty(self, name): if name == handler.property_lexical_handler: return self._lex_handler_prop elif name == property_interning_dict: return self._interning elif name == property_xml_string: if self._parser: if hasattr(self._parser, "GetInputContext"): return self._parser.GetInputContext() else: raise SAXNotRecognizedException( "This version of expat does not support getting" " the XML string") else: raise SAXNotSupportedException( "XML string cannot be returned when not parsing") raise SAXNotRecognizedException("Property '%s' not recognized" % name) def setProperty(self, name, value): if name == handler.property_lexical_handler: self._lex_handler_prop = value if self._parsing: self._reset_lex_handler_prop() elif name == property_interning_dict: self._interning = value elif name == property_xml_string: raise SAXNotSupportedException("Property '%s' cannot be set" % name) else: raise SAXNotRecognizedException("Property '%s' not recognized" % name) # IncrementalParser methods def feed(self, data, isFinal = 0): if not self._parsing: self.reset() self._parsing = 1 self._cont_handler.startDocument() try: # The isFinal parameter is internal to the expat reader. # If it is set to true, expat will check validity of the entire # document. When feeding chunks, they are not normally final - # except when invoked from close. self._parser.Parse(data, isFinal) except expat.error, e: exc = SAXParseException(expat.ErrorString(e.code), e, self) # FIXME: when to invoke error()? self._err_handler.fatalError(exc) def close(self): if self._entity_stack: # If we are completing an external entity, do nothing here return self.feed("", isFinal = 1) self._cont_handler.endDocument() self._parsing = 0 # break cycle created by expat handlers pointing to our methods self._parser = None def _reset_cont_handler(self): self._parser.ProcessingInstructionHandler = \ self._cont_handler.processingInstruction self._parser.CharacterDataHandler = self._cont_handler.characters def _reset_lex_handler_prop(self): lex = self._lex_handler_prop parser = self._parser if lex is None: parser.CommentHandler = None parser.StartCdataSectionHandler = None parser.EndCdataSectionHandler = None parser.StartDoctypeDeclHandler = None parser.EndDoctypeDeclHandler = None else: parser.CommentHandler = lex.comment parser.StartCdataSectionHandler = lex.startCDATA parser.EndCdataSectionHandler = lex.endCDATA parser.StartDoctypeDeclHandler = self.start_doctype_decl parser.EndDoctypeDeclHandler = lex.endDTD def reset(self): if self._namespaces: self._parser = expat.ParserCreate(None, " ", intern=self._interning) self._parser.namespace_prefixes = 1 self._parser.StartElementHandler = self.start_element_ns self._parser.EndElementHandler = self.end_element_ns else: self._parser = expat.ParserCreate(intern = self._interning) self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element self._reset_cont_handler() self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl self._parser.NotationDeclHandler = self.notation_decl self._parser.StartNamespaceDeclHandler = self.start_namespace_decl self._parser.EndNamespaceDeclHandler = self.end_namespace_decl self._decl_handler_prop = None if self._lex_handler_prop: self._reset_lex_handler_prop() # self._parser.DefaultHandler = # self._parser.DefaultHandlerExpand = # self._parser.NotStandaloneHandler = self._parser.ExternalEntityRefHandler = self.external_entity_ref try: self._parser.SkippedEntityHandler = self.skipped_entity_handler except AttributeError: # This pyexpat does not support SkippedEntity pass self._parser.SetParamEntityParsing( expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) self._parsing = 0 self._entity_stack = [] # default values when _parser goes aways self._ColumnNumber = None self._LineNumber = 1 # Locator methods def getColumnNumber(self): if self._parser is None: return self._ColumnNumber return self._parser.ErrorColumnNumber def getLineNumber(self): if self._parser is None: return self._LineNumber return self._parser.ErrorLineNumber def getPublicId(self): return self._source.getPublicId() def getSystemId(self): return self._source.getSystemId() # event handlers def start_element(self, name, attrs): self._cont_handler.startElement(name, AttributesImpl(attrs)) def end_element(self, name): self._cont_handler.endElement(name) def start_element_ns(self, name, attrs): pair = name.split() if len(pair) == 1: # no namespace elem_qname = name pair = (None, name) elif len(pair) == 3: # namespace plus prefix elem_qname = "%s:%s" % (pair[2], pair[1]) pair = pair[0], pair[1] else: # default namespace elem_qname = pair[1] pair = tuple(pair) newattrs = {} qnames = {} for (aname, value) in attrs.items(): parts = aname.split() length = len(parts) if length == 1: # no namespace qname = aname apair = (None, aname) elif length == 3: qname = "%s:%s" % (parts[2], parts[1]) apair = parts[0], parts[1] else: # default namespace qname = parts[1] apair = tuple(parts) newattrs[apair] = value qnames[apair] = qname self._cont_handler.startElementNS(pair, elem_qname, AttributesNSImpl(newattrs, qnames)) def end_element_ns(self, name): pair = name.split() if len(pair) == 1: pair = (None, name) elif len(pair) == 3: pair = pair[0], pair[1] else: pair = tuple(pair) self._cont_handler.endElementNS(pair, None) # this is not used (call directly to ContentHandler) def processing_instruction(self, target, data): self._cont_handler.processingInstruction(target, data) # this is not used (call directly to ContentHandler) def character_data(self, data): self._cont_handler.characters(data) def start_namespace_decl(self, prefix, uri): self._cont_handler.startPrefixMapping(prefix, uri) def end_namespace_decl(self, prefix): self._cont_handler.endPrefixMapping(prefix) def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): self._lex_handler_prop.startDTD(name, pubid, sysid) def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) def notation_decl(self, name, base, sysid, pubid): self._dtd_handler.notationDecl(name, pubid, sysid) def external_entity_ref(self, context, base, sysid, pubid): if not self._external_ges: return 1 source = self._ent_handler.resolveEntity(pubid, sysid) source = saxutils.prepare_input_source(source, self._source.getSystemId() or "") self._entity_stack.append((self._parser, self._source)) self._parser = self._parser.ExternalEntityParserCreate(context) self._source = source try: xmlreader.IncrementalParser.parse(self, source) except: return 0 # FIXME: save error info here? (self._parser, self._source) = self._entity_stack[-1] del self._entity_stack[-1] return 1 def skipped_entity_handler(self, name, is_pe): if is_pe: # The SAX spec requires to report skipped PEs with a '%' name = '%'+name self._cont_handler.skippedEntity(name) # --- def create_parser(*args, **kwargs): return apply(ExpatParser, args, kwargs) # --- if __name__ == "__main__": import xml.sax p = create_parser() p.setContentHandler(xml.sax.XMLGenerator()) p.setErrorHandler(xml.sax.ErrorHandler()) p.parse("../../../hamlet.xml")