# -*- coding: utf-8 -*- # Parser for XPath in -*- python -*-, as defined in REC-xpath-19991116 # Copyright 2000, Martin v. Löwis # This parser is generated by Amit J Patel's YAPPS # http://theory.stanford.edu/~amitp/Yapps/ for documentation and updates # The generated Scanner class is not used, and redefined at the end. # Therefore, the token definitions are for illustration only, and to # let YAPPS know what the tokens are. # The grammar rules attempt to follow the XPath recommendation closely, # both in textual order and presentation. The following changes have been # made: # - left-recursion was replaced with right-recursion # - left-factorization was applied where necessary # - semantic values were attached to non-terminals from string import * import re from yappsrt import * class XPathScanner(Scanner): patterns = [ ("'mod'", re.compile('mod')), ("'div'", re.compile('div')), ("'-'", re.compile('-')), ("'>='", re.compile('>=')), ("'>'", re.compile('>')), ("'<='", re.compile('<=')), ("'<'", re.compile('<')), ("'!='", re.compile('!=')), ("'='", re.compile('=')), ("'and'", re.compile('and')), ("'or'", re.compile('or')), ("','", re.compile(',')), ("'@'", re.compile('@')), ("'::'", re.compile('::')), ("'//'", re.compile('//')), ("'/'", re.compile('/')), ('Literal', re.compile('"[^"]*"|\'[^\']*')), ('Number', re.compile('\\d+(.\\d*)?|.\\d+')), ('VariableReference', re.compile('\\$[a-zA-Z_][:a-zA-Z0-9_.-]*')), ('NodeType', re.compile('comment|text|processing-instruction|node')), ('AxisName', re.compile('ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self')), ('NCName', re.compile('[a-zA-Z_][a-zA-Z0-9_.-]*')), ('NCNameStar', re.compile('[a-zA-Z_][a-zA-Z0-9_.-]*:\\*')), ('QName', re.compile('[a-zA-Z_][a-zA-Z0-9_.-]*(:[a-zA-Z_][a-zA-Z0-9_.-])?')), ('MultiplyOperator', re.compile('\\*')), ('LPAREN', re.compile('\\(')), ('RPAREN', re.compile('\\)')), ('STAR', re.compile('\\*')), ('PLUS', re.compile('\\+')), ('LBRACKET', re.compile('\\[')), ('RBRACKET', re.compile('\\]')), ('FunctionName', re.compile('[a-zA-Z_][a-zA-Z0-9_.-]*(:[a-zA-Z_][a-zA-Z0-9_.-]*)?')), ('DOT', re.compile('\\.')), ('DOTDOT', re.compile('\\.\\.')), ('BAR', re.compile('\\|')), ('END', re.compile('#')), ('ID', re.compile('id')), ('KEY', re.compile('key')), ] def __init__(self, str): Scanner.__init__(self,None,[],str) class XPath(Parser): def Start(self): LocationPath = self.LocationPath() END = self._scan('END') return LocationPath def FullExpr(self): Expr = self.Expr() END = self._scan('END') return Expr def LocationPath(self): _token_ = self._peek() if _token_ in ['AxisName', 'NodeType', 'DOT', 'DOTDOT', "'@'", 'STAR', 'QName', 'NCNameStar', 'NCName']: RelativeLocationPath = self.RelativeLocationPath() return RelativeLocationPath elif _token_ in ["'/'", "'//'"]: AbsoluteLocationPath = self.AbsoluteLocationPath() return AbsoluteLocationPath else: raise SyntaxError(self._pos, 'Could not match LocationPath') def AbsoluteLocationPath(self): _token_ = self._peek() if _token_ == "'/'": self._scan("'/'") OptRelativeLocationPath = self.OptRelativeLocationPath() return self.absoluteLocationPath(OptRelativeLocationPath) elif _token_ == "'//'": AbbreviatedAbsoluteLocationPath = self.AbbreviatedAbsoluteLocationPath() return AbbreviatedAbsoluteLocationPath else: raise SyntaxError(self._pos, 'Could not match AbsoluteLocationPath') def OptRelativeLocationPath(self): _token_ = self._peek() if _token_ not in ["'@'", "'::'", "'//'", "'/'", 'Literal', 'Number', 'VariableReference', 'NodeType', 'AxisName', 'NCName', 'NCNameStar', 'QName', 'LPAREN', 'STAR', 'LBRACKET', 'FunctionName', 'DOT', 'DOTDOT', 'ID', 'KEY']: return None elif _token_ not in ["'::'", "'//'", "'/'", 'Literal', 'Number', 'VariableReference', 'LPAREN', 'LBRACKET', 'FunctionName', 'ID', 'KEY']: RelativeLocationPath = self.RelativeLocationPath() return RelativeLocationPath else: raise SyntaxError(self._pos, 'Could not match OptRelativeLocationPath') def RelativeLocationPath(self): Step = self.Step() RelativeLocationPaths = self.RelativeLocationPaths(Step) return RelativeLocationPaths def RelativeLocationPaths(self, v): _token_ = self._peek() if _token_ not in ["'@'", "'::'", "'//'", "'/'", 'Literal', 'Number', 'VariableReference', 'NodeType', 'AxisName', 'NCName', 'NCNameStar', 'QName', 'LPAREN', 'STAR', 'LBRACKET', 'FunctionName', 'DOT', 'DOTDOT', 'ID', 'KEY']: return v elif _token_ == "'/'": self._scan("'/'") Step = self.Step() RelativeLocationPaths = self.RelativeLocationPaths(self.rlp(v,Step)) return RelativeLocationPaths elif _token_ == "'//'": self._scan("'//'") Step = self.Step() RelativeLocationPaths = self.RelativeLocationPaths(self.arlp(v,Step)) return RelativeLocationPaths else: raise SyntaxError(self._pos, 'Could not match RelativeLocationPaths') def Step(self): _token_ = self._peek() if _token_ in ['AxisName', 'NodeType', "'@'", 'STAR', 'QName', 'NCNameStar', 'NCName']: AxisSpecifier = self.AxisSpecifier() NodeTest = self.NodeTest() Predicates = self.Predicates() return self.step(AxisSpecifier,NodeTest,Predicates) elif _token_ in ['DOT', 'DOTDOT']: AbbreviatedStep = self.AbbreviatedStep() return AbbreviatedStep else: raise SyntaxError(self._pos, 'Could not match Step') def Predicates(self): _token_ = self._peek() if _token_ not in ["'@'", "'::'", 'Literal', 'Number', 'VariableReference', 'NodeType', 'AxisName', 'NCName', 'NCNameStar', 'QName', 'LPAREN', 'STAR', 'LBRACKET', 'FunctionName', 'DOT', 'DOTDOT', 'ID', 'KEY']: return [] elif _token_ == 'LBRACKET': Predicate = self.Predicate() Predicates = self.Predicates() return [Predicate]+Predicates else: raise SyntaxError(self._pos, 'Could not match Predicates') def AxisSpecifier(self): _token_ = self._peek() if _token_ == 'AxisName': AxisName = self._scan('AxisName') self._scan("'::'") return self.axisSpecifier(self.anMap[AxisName]) elif _token_ in ["'@'", 'NodeType', 'STAR', 'QName', 'NCNameStar', 'NCName']: AbbreviatedAxisSpecifier = self.AbbreviatedAxisSpecifier() return AbbreviatedAxisSpecifier else: raise SyntaxError(self._pos, 'Could not match AxisSpecifier') def NodeTest(self): _token_ = self._peek() if _token_ in ['STAR', 'QName', 'NCNameStar', 'NCName']: NameTest = self.NameTest() return NameTest elif _token_ == 'NodeType': NodeType = self._scan('NodeType') LPAREN = self._scan('LPAREN') OptLiteral = self.OptLiteral() RPAREN = self._scan('RPAREN') return self.mkNodeTest(NodeType,OptLiteral) else: raise SyntaxError(self._pos, 'Could not match NodeTest') def OptLiteral(self): _token_ = self._peek() if _token_ == 'RPAREN': return None elif _token_ == 'Literal': Literal = self._scan('Literal') return Literal else: raise SyntaxError(self._pos, 'Could not match OptLiteral') def NameTest(self): _token_ = self._peek() if _token_ == 'STAR': STAR = self._scan('STAR') return self.nameTest(None,"*") elif _token_ == 'QName': QName = self._scan('QName') return self.mkQName(QName) elif _token_ == 'NCNameStar': NCNameStar = self._scan('NCNameStar') return self.nameTest(NCNameStar[:-2],'*') elif _token_ == 'NCName': NCName = self._scan('NCName') return self.nameTest(None,NCName) else: raise SyntaxError(self._pos, 'Could not match NameTest') def Predicate(self): LBRACKET = self._scan('LBRACKET') PredicateExpr = self.PredicateExpr() RBRACKET = self._scan('RBRACKET') return PredicateExpr def PredicateExpr(self): Expr = self.Expr() return Expr def AbbreviatedAbsoluteLocationPath(self): self._scan("'//'") RelativeLocationPath = self.RelativeLocationPath() return self.aalp(RelativeLocationPath) def AbbreviatedStep(self): _token_ = self._peek() if _token_ == 'DOT': DOT = self._scan('DOT') return self.abbreviatedStep(0) elif _token_ == 'DOTDOT': DOTDOT = self._scan('DOTDOT') return self.abbreviatedStep(1) else: raise SyntaxError(self._pos, 'Could not match AbbreviatedStep') def AbbreviatedAxisSpecifier(self): _token_ = self._peek() if _token_ in ['NodeType', 'STAR', 'QName', 'NCNameStar', 'NCName']: return self.axisSpecifier(pyxpath.CHILD_AXIS) elif _token_ == "'@'": self._scan("'@'") return self.axisSpecifier(pyxpath.ATTRIBUTE_AXIS) else: raise SyntaxError(self._pos, 'Could not match AbbreviatedAxisSpecifier') def Expr(self): OrExpr = self.OrExpr() return OrExpr def PrimaryExpr(self): _token_ = self._peek() if _token_ == 'VariableReference': VariableReference = self._scan('VariableReference') return self.mkVariableReference(VariableReference) elif _token_ == 'LPAREN': LPAREN = self._scan('LPAREN') Expr = self.Expr() RPAREN = self._scan('RPAREN') return Expr elif _token_ == 'Literal': Literal = self._scan('Literal') return self.literal(Literal) elif _token_ == 'Number': Number = self._scan('Number') return self.number(Number) elif _token_ in ['FunctionName', 'ID', 'KEY']: FunctionCall = self.FunctionCall() return FunctionCall else: raise SyntaxError(self._pos, 'Could not match PrimaryExpr') def FunctionCall(self): _token_ = self._peek() if _token_ == 'FunctionName': FunctionName = self._scan('FunctionName') LPAREN = self._scan('LPAREN') Arguments = self.Arguments() RPAREN = self._scan('RPAREN') return self.mkFunctionCall(FunctionName,Arguments) elif _token_ == 'ID': ID = self._scan('ID') LPAREN = self._scan('LPAREN') Arguments = self.Arguments() RPAREN = self._scan('RPAREN') return self.functionCall(None,'id',Arguments) elif _token_ == 'KEY': KEY = self._scan('KEY') LPAREN = self._scan('LPAREN') Arguments = self.Arguments() RPAREN = self._scan('RPAREN') return self.functionCall(None,'key',Arguments) else: raise SyntaxError(self._pos, 'Could not match FunctionCall') def Arguments(self): _token_ = self._peek() if _token_ == 'RPAREN': return [] elif _token_ not in ["'mod'", "'div'", "'>='", "'>'", "'<='", "'<'", "'!='", "'='", "'and'", "'or'", "','", "'::'", 'MultiplyOperator', 'PLUS', 'LBRACKET', 'RBRACKET', 'BAR', 'END']: Argument = self.Argument() KommaArguments = self.KommaArguments([Argument]) return KommaArguments else: raise SyntaxError(self._pos, 'Could not match Arguments') def KommaArguments(self, v): _token_ = self._peek() if _token_ == 'RPAREN': return v elif _token_ == "','": self._scan("','") Argument = self.Argument() KommaArguments = self.KommaArguments(v+[Argument]) return KommaArguments else: raise SyntaxError(self._pos, 'Could not match KommaArguments') def Argument(self): Expr = self.Expr() return Expr def UnionExpr(self): PathExpr = self.PathExpr() UnionExprs = self.UnionExprs(PathExpr) return UnionExprs def UnionExprs(self, v): _token_ = self._peek() if _token_ not in ["'@'", "'::'", "'//'", "'/'", 'Literal', 'Number', 'VariableReference', 'NodeType', 'AxisName', 'NCName', 'NCNameStar', 'QName', 'LPAREN', 'STAR', 'LBRACKET', 'FunctionName', 'DOT', 'DOTDOT', 'BAR', 'ID', 'KEY']: return v elif _token_ == 'BAR': BAR = self._scan('BAR') PathExpr = self.PathExpr() UnionExprs = self.UnionExprs(self.nop(self.UNION,v,PathExpr)) return UnionExprs else: raise SyntaxError(self._pos, 'Could not match UnionExprs') def PathExpr(self): _token_ = self._peek() if _token_ in ["'/'", "'//'", 'AxisName', 'NodeType', 'DOT', 'DOTDOT', "'@'", 'STAR', 'QName', 'NCNameStar', 'NCName']: LocationPath = self.LocationPath() return LocationPath elif _token_ in ['VariableReference', 'LPAREN', 'Literal', 'Number', 'FunctionName', 'ID', 'KEY']: FilterExpr = self.FilterExpr() PathExprRest = self.PathExprRest(FilterExpr) return PathExprRest else: raise SyntaxError(self._pos, 'Could not match PathExpr') def PathExprRest(self, v): _token_ = self._peek() if _token_ not in ["'@'", "'::'", "'//'", "'/'", 'Literal', 'Number', 'VariableReference', 'NodeType', 'AxisName', 'NCName', 'NCNameStar', 'QName', 'LPAREN', 'STAR', 'LBRACKET', 'FunctionName', 'DOT', 'DOTDOT', 'ID', 'KEY']: return v elif _token_ == "'/'": self._scan("'/'") RelativeLocationPath = self.RelativeLocationPath() return self.pathExpr(v,RelativeLocationPath) elif _token_ == "'//'": self._scan("'//'") RelativeLocationPath = self.RelativeLocationPath() return self.abbreviatedPathExpr(v,RelativeLocationPath) else: raise SyntaxError(self._pos, 'Could not match PathExprRest') def FilterExpr(self): PrimaryExpr = self.PrimaryExpr() FilterExprs = self.FilterExprs(PrimaryExpr) return FilterExprs def FilterExprs(self, v): _token_ = self._peek() if _token_ not in ["'@'", "'::'", 'Literal', 'Number', 'VariableReference', 'NodeType', 'AxisName', 'NCName', 'NCNameStar', 'QName', 'LPAREN', 'STAR', 'LBRACKET', 'FunctionName', 'DOT', 'DOTDOT', 'ID', 'KEY']: return v elif _token_ == 'LBRACKET': Predicate = self.Predicate() e=[Predicate] while self._peek() == 'LBRACKET': Predicate = self.Predicate() e.append(Predicate) return self.filterExpr(v,e) else: raise SyntaxError(self._pos, 'Could not match FilterExprs') def OrExpr(self): AndExpr = self.AndExpr() OrExprs = self.OrExprs(AndExpr) return OrExprs def OrExprs(self, v): _token_ = self._peek() if _token_ == "'or'": self._scan("'or'") AndExpr = self.AndExpr() OrExprs = self.OrExprs(self.bop(self.OR,v,AndExpr)) return OrExprs elif _token_ in ['END', 'RPAREN', 'RBRACKET', "','"]: return v else: raise SyntaxError(self._pos, 'Could not match OrExprs') def AndExpr(self): EqualityExpr = self.EqualityExpr() AndExprs = self.AndExprs(EqualityExpr) return AndExprs def AndExprs(self, v): _token_ = self._peek() if _token_ == "'and'": self._scan("'and'") EqualityExpr = self.EqualityExpr() AndExprs = self.AndExprs(self.bop(self.AND,v,EqualityExpr)) return AndExprs elif _token_ in ["'or'", 'END', 'RPAREN', 'RBRACKET', "','"]: return v else: raise SyntaxError(self._pos, 'Could not match AndExprs') def EqualityExpr(self): RelationalExpr = self.RelationalExpr() EqualityExprs = self.EqualityExprs(RelationalExpr) return EqualityExprs def EqualityExprs(self, v): _token_ = self._peek() if _token_ == "'='": self._scan("'='") RelationalExpr = self.RelationalExpr() EqualityExprs = self.EqualityExprs(self.bop(self.EQ,v,RelationalExpr)) return EqualityExprs elif _token_ == "'!='": self._scan("'!='") RelationalExpr = self.RelationalExpr() EqualityExprs = self.EqualityExprs(self.bop(self.NEQ,v,RelationalExpr)) return EqualityExprs elif _token_ in ["'and'", "'or'", 'END', 'RPAREN', 'RBRACKET', "','"]: return v else: raise SyntaxError(self._pos, 'Could not match EqualityExprs') def RelationalExpr(self): AdditiveExpr = self.AdditiveExpr() RelationalExprs = self.RelationalExprs(AdditiveExpr) return RelationalExprs def RelationalExprs(self, v): _token_ = self._peek() if _token_ == "'<'": self._scan("'<'") AdditiveExpr = self.AdditiveExpr() RelationalExprs = self.RelationalExprs(self.bop(self.LT,v,AdditiveExpr)) return RelationalExprs elif _token_ == "'<='": self._scan("'<='") AdditiveExpr = self.AdditiveExpr() RelationalExprs = self.RelationalExprs(self.bop(self.LE,v,AdditiveExpr)) return RelationalExprs elif _token_ == "'>'": self._scan("'>'") AdditiveExpr = self.AdditiveExpr() RelationalExprs = self.RelationalExprs(self.bop(self.GT,v,AdditiveExpr)) return RelationalExprs elif _token_ == "'>='": self._scan("'>='") AdditiveExpr = self.AdditiveExpr() RelationalExprs = self.RelationalExprs(self.bop(self.GE,v,AdditiveExpr)) return RelationalExprs elif _token_ in ["'='", "'!='", "'and'", "'or'", 'END', 'RPAREN', 'RBRACKET', "','"]: return v else: raise SyntaxError(self._pos, 'Could not match RelationalExprs') def AdditiveExpr(self): MultiplicativeExpr = self.MultiplicativeExpr() AdditiveExprs = self.AdditiveExprs(MultiplicativeExpr) return AdditiveExprs def AdditiveExprs(self, v): _token_ = self._peek() if _token_ == 'PLUS': PLUS = self._scan('PLUS') MultiplicativeExpr = self.MultiplicativeExpr() AdditiveExprs = self.AdditiveExprs(self.nop(self.PLUS,v,MultiplicativeExpr)) return AdditiveExprs elif _token_ == "'-'": self._scan("'-'") MultiplicativeExpr = self.MultiplicativeExpr() AdditiveExprs = self.AdditiveExprs(self.nop(self.MINUS,v,MultiplicativeExpr)) return AdditiveExprs elif _token_ in ["'<'", "'<='", "'>'", "'>='", "'='", "'!='", "'and'", "'or'", 'END', 'RPAREN', 'RBRACKET', "','"]: return v else: raise SyntaxError(self._pos, 'Could not match AdditiveExprs') def MultiplicativeExpr(self): UnaryExpr = self.UnaryExpr() MultiplicativeExprs = self.MultiplicativeExprs(UnaryExpr) return MultiplicativeExprs def MultiplicativeExprs(self, v): _token_ = self._peek() if _token_ == 'MultiplyOperator': MultiplyOperator = self._scan('MultiplyOperator') UnaryExpr = self.UnaryExpr() MultiplicativeExprs = self.MultiplicativeExprs(self.nop(self.TIMES,v,UnaryExpr)) return MultiplicativeExprs elif _token_ == "'div'": self._scan("'div'") UnaryExpr = self.UnaryExpr() MultiplicativeExprs = self.MultiplicativeExprs(self.nop(self.DIV,v,UnaryExpr)) return MultiplicativeExprs elif _token_ == "'mod'": self._scan("'mod'") UnaryExpr = self.UnaryExpr() MultiplicativeExprs = self.MultiplicativeExprs(self.nop(self.MOD,v,UnaryExpr)) return MultiplicativeExprs elif _token_ not in ["'@'", "'::'", "'//'", "'/'", 'Literal', 'Number', 'VariableReference', 'NodeType', 'AxisName', 'NCName', 'NCNameStar', 'QName', 'LPAREN', 'STAR', 'LBRACKET', 'FunctionName', 'DOT', 'DOTDOT', 'BAR', 'ID', 'KEY']: return v else: raise SyntaxError(self._pos, 'Could not match MultiplicativeExprs') def UnaryExpr(self): _token_ = self._peek() if _token_ == "'-'": self._scan("'-'") UnaryExpr = self.UnaryExpr() return self.unaryExpr(UnaryExpr) elif _token_ not in ["'mod'", "'div'", "'>='", "'>'", "'<='", "'<'", "'!='", "'='", "'and'", "'or'", "','", "'::'", 'MultiplyOperator', 'RPAREN', 'PLUS', 'LBRACKET', 'RBRACKET', 'BAR', 'END']: UnionExpr = self.UnionExpr() return UnionExpr else: raise SyntaxError(self._pos, 'Could not match UnaryExpr') def FullPattern(self): Pattern = self.Pattern() END = self._scan('END') return Pattern def Pattern(self): LocationPathPattern = self.LocationPathPattern() p = self.pattern(LocationPathPattern) while self._peek() == 'BAR': BAR = self._scan('BAR') LocationPathPattern = self.LocationPathPattern() p.append(LocationPathPattern) return p def LocationPathPattern(self): _token_ = self._peek() if _token_ == "'/'": self._scan("'/'") OptRelativePathPattern = self.OptRelativePathPattern() return self.locationPathPattern(None,1,OptRelativePathPattern) elif _token_ in ['ID', 'KEY']: IdKeyPattern = self.IdKeyPattern() IdTail = self.IdTail() return self.locationPathPattern(IdKeyPattern,IdTail[0],IdTail[1]) elif _token_ in ['NodeType', "'@'", 'AxisName', 'STAR', 'QName', 'NCNameStar', 'NCName']: RelativePathPattern = self.RelativePathPattern() return RelativePathPattern elif _token_ == "'//'": self._scan("'//'") RelativePathPattern = self.RelativePathPattern() return self.locationPathPattern(None,0,RelativePathPattern) else: raise SyntaxError(self._pos, 'Could not match LocationPathPattern') def OptRelativePathPattern(self): _token_ = self._peek() if _token_ in ['BAR', 'END']: return None elif _token_ in ['NodeType', "'@'", 'AxisName', 'STAR', 'QName', 'NCNameStar', 'NCName']: RelativePathPattern = self.RelativePathPattern() return RelativePathPattern else: raise SyntaxError(self._pos, 'Could not match OptRelativePathPattern') def IdTail(self): _token_ = self._peek() if _token_ in ['BAR', 'END']: return (0,None) elif _token_ == "'/'": self._scan("'/'") RelativePathPattern = self.RelativePathPattern() return (1,RelativePathPattern) elif _token_ == "'//'": self._scan("'//'") RelativePathPattern = self.RelativePathPattern() return (0,RelativePathPattern) else: raise SyntaxError(self._pos, 'Could not match IdTail') def IdKeyPattern(self): _token_ = self._peek() if _token_ == 'ID': ID = self._scan('ID') LPAREN = self._scan('LPAREN') Argument = self.Argument() RPAREN = self._scan('RPAREN') return self.functionCall(None,"id", [Argument]) elif _token_ == 'KEY': KEY = self._scan('KEY') LPAREN = self._scan('LPAREN') Argument = self.Argument() a1=Argument self._scan("','") Argument = self.Argument() RPAREN = self._scan('RPAREN') return self.functionCall(None,"key", [a1,Argument]) else: raise SyntaxError(self._pos, 'Could not match IdKeyPattern') def RelativePathPattern(self): StepPattern = self.StepPattern() p=StepPattern while self._peek() in ["'/'", "'//'"]: _token_ = self._peek() if _token_ == "'/'": self._scan("'/'") StepPattern = self.StepPattern() p=self.rpp(p, 1, StepPattern) elif _token_ == "'//'": self._scan("'//'") StepPattern = self.StepPattern() p=self.rpp(p, 0, StepPattern) else: raise SyntaxError(self._pos, 'Could not match RelativePathPattern') return p def StepPattern(self): ChildOrAttributeAxisSpecifier = self.ChildOrAttributeAxisSpecifier() NodeTest = self.NodeTest() pred=[] while self._peek() == 'LBRACKET': Predicate = self.Predicate() pred.append(Predicate) return self.stepPattern(ChildOrAttributeAxisSpecifier,NodeTest,pred) def ChildOrAttributeAxisSpecifier(self): _token_ = self._peek() if _token_ in ["'@'", 'NodeType', 'STAR', 'QName', 'NCNameStar', 'NCName']: AbbreviatedAxisSpecifier = self.AbbreviatedAxisSpecifier() return AbbreviatedAxisSpecifier elif _token_ == 'AxisName': AxisName = self._scan('AxisName') self._scan("'::'") return self.axisSpecifier(self.anMap[AxisName]) else: raise SyntaxError(self._pos, 'Could not match ChildOrAttributeAxisSpecifier') def parse(rule, text): P = XPath(XPathScanner(text)) return wrap_error_reporter(P, rule) # Reimplement scanner, to properly use disambiguation import re, sys NCName = "[a-zA-Z_](\w|[_.-])*" # In this version of QName, the namespace prefix is not optional. # As a result, QName matches iff there is a colon, NCName otherwise. # All appearances of QName in the grammar then need to allow NCName # as an alternative; currently, QName is used only once. QName = NCName + ":" + NCName XPathExpr=""" (?P\"[^\"]*\"|\'[^\']*\')| (?P\\d+(\\.\\d*)?|\\.\\d+)| (?P\\$""" + NCName + "(:" + NCName + """)?)| (?P"""+QName+""")| (?P"""+NCName+""":\*)| (?P"""+NCName+""")| (?P\\()| (?P\\))| (?P\\*)| (?P\\+)| (?P\\[)| (?P\\])| (?P\\.\\.)| (?P\\.)| (?P\\|)| (?P//|::|>=|<=|!=)| (?P[<>=,/@:-])| (?P[ \t\n\r]+) """ _xpath_exp = re.compile(XPathExpr,re.VERBOSE) OperatorName = ['and','or','mod','div'] AxisName = ['ancestor', 'ancestor-or-self', 'attribute', 'child', 'descendant', 'descendant-or-self', 'following', 'following-sibling', 'namespace', 'parent', 'preceding', 'preceding-sibling', 'self'] SpecialPreceding = map(repr,["@","::","(","["] + OperatorName + ['/', '//', '+', '-', '=', '!=', '<', '<=', '>', '>=']) + ["BAR","MultiplyOperator"] if sys.hexversion > 0x2000000: def _get_type(match): return match.lastgroup,match.group() else: def _get_type(match): type = val = None for t,v in match.groupdict().items(): if v is None: continue if val: raise SyntaxError(pos, "ambiguity:%s could be %s or %s" % (val,type,t)) type = t val = v return type,val class XPathScanner: def __init__(self,input): self.tokens = tokens = [] pos = 0 # Process all tokens, advancing pos for each one while pos != len(input): m = _xpath_exp.match(input, pos) if not m: msg = "Bad Token" raise SyntaxError(pos, msg) type, val = _get_type(m) if type == "ExprWhiteSpace": # If we got white space, ignore it pos = pos + len(val) continue if type in ['SingleOperator', 'Operator']: type = repr(str(val)) start = pos pos = pos + len(val) tokens.append((start, pos, type, val)) # If we are at the end of the string, add END token tokens.append((pos,pos,'END',"")) # Adjust token type according to additional semantic rules for i in range(len(tokens)-1): start,stop,type,val = tokens[i] changed = 0 # If there is a preceding token and the preceding token is not # one of @, ::, (, [, , or an Operator if i>=1 and tokens[i-1][2] not in SpecialPreceding: if type == 'STAR': # then a * must be recognized as a MultiplyOperator type = 'MultiplyOperator' tokens[i] = (start,stop,type,val) elif type == 'NCName' and val in OperatorName: # and an NCName must be recognized as an OperatorName type = repr(str(val)) tokens[i] = (start,stop,type,val) # If the character following an NCName (possibly after # intervening ExprWhitespace) is ( if tokens[i][2] in ['QName','NCName'] and tokens[i+1][2]=='LPAREN': # then the token must be recognized as a NodeType or a # FunctionName if val in ['comment','text','processing-instruction','node']: type = 'NodeType' elif val == 'id': type = 'ID' elif val == 'key': type = 'KEY' else: type = 'FunctionName' tokens[i] = (start,stop,type,val) # If the two characters following an NCName (possibly # after intervening ExprWhitespace) are :: if tokens[i][2] == 'NCName' and tokens[i+1][3]=='::' \ and val in AxisName: # then the token must be recognized as an AxisName. type = 'AxisName' tokens[i] = (start,stop,type,val) def token(self, i, expected): return self.tokens[i] # redefine to add additional attributes import pyxpath,string GeneratedXPath = XPath class XPath(GeneratedXPath): OR = pyxpath.OR_OPERATOR AND = pyxpath.AND_OPERATOR EQ = pyxpath.EQ_OPERATOR NEQ = pyxpath.NEQ_OPERATOR LT = pyxpath.LT_OPERATOR GT = pyxpath.GT_OPERATOR LE = pyxpath.LE_OPERATOR GE = pyxpath.GE_OPERATOR PLUS = pyxpath.PLUS_OPERATOR MINUS = pyxpath.MINUS_OPERATOR TIMES = pyxpath.TIMES_OPERATOR DIV = pyxpath.DIV_OPERATOR MOD = pyxpath.MOD_OPERATOR UNION = pyxpath.UNION_OPERATOR def __init__(self, scanner, factory): GeneratedXPath.__init__(self, scanner) self.factory = factory # shorthands self.rlp = self.factory.createRelativeLocationPath self.arlp = self.factory.createAbbreviatedRelativeLocationPath self.aalp = self.factory.createAbbreviatedAbsoluteLocationPath self.nop = self.factory.createNumericExpr self.bop = self.factory.createBooleanExpr self.rpp = self.factory.createRelativePathPattern def __getattr__(self, name): # convert newname = "create"+string.upper(name[0])+name[1:] try: return getattr(self.factory, newname) except AttributeError: raise AttributeError,"parser has no attribute "+name anMap = { 'ancestor':pyxpath.ANCESTOR_AXIS, 'ancestor-or-self':pyxpath.ANCESTOR_OR_SELF_AXIS, 'attribute':pyxpath.ATTRIBUTE_AXIS, 'child':pyxpath.CHILD_AXIS, 'descendant':pyxpath.DESCENDANT_AXIS, 'descendant-or-self':pyxpath.DESCENDANT_OR_SELF_AXIS, 'following':pyxpath.FOLLOWING_AXIS, 'following-sibling':pyxpath.FOLLOWING_SIBLING_AXIS, 'namespace':pyxpath.NAMESPACE_AXIS, 'parent':pyxpath.PARENT_AXIS, 'preceding':pyxpath.PRECEDING_AXIS, 'preceding-sibling':pyxpath.PRECEDING_SIBLING_AXIS, 'self':pyxpath.SELF_AXIS } nodeTestMap = { 'node': pyxpath.NODE, 'comment': pyxpath.COMMENT, 'text': pyxpath.TEXT, 'processing-instruction': pyxpath.PROCESSING_INSTRUCTION } def mkNodeTest(self,op,val): type = self.nodeTestMap[op] if type != pyxpath.PROCESSING_INSTRUCTION and val is not None: raise SyntaxError("parameter not allowed for "+op) return self.factory.createNodeTest(type,val) def mkQName(self,str): prefix,local = string.split(str,":") return self.factory.createNameTest(prefix,local) def mkVariableReference(self, qname): colon = string.find(qname,':') if colon == -1: return self.variableReference(None, qname[1:]) return self.variableReference(qname[1:colon],qname[colon+1:]) def mkFunctionCall(self, qname, args): colon = string.find(qname,':') if colon == -1: return self.functionCall(None, qname, args) return self.functionCall(qname[:colon],qname[colon+1:],args)