/* * Copyright 2005 Maksim Orlovich * Copyright (C) 2006, 2013 Apple Inc. All rights reserved. * Copyright (C) 2007 Alexey Proskuryakov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "XPathParser.h" #include "ExceptionCode.h" #include "XPathEvaluator.h" #include "XPathException.h" #include "XPathNSResolver.h" #include "XPathPath.h" #include #include #include using namespace WebCore; using namespace XPath; extern int xpathyyparse(Parser&); #include "XPathGrammar.h" namespace WebCore { namespace XPath { struct Parser::Token { int type; String string; Step::Axis axis; NumericOp::Opcode numericOpcode; EqTestOp::Opcode equalityTestOpcode; Token(int type) : type(type) { } Token(int type, const String& string) : type(type), string(string) { } Token(int type, Step::Axis axis) : type(type), axis(axis) { } Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { } Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { } }; enum XMLCat { NameStart, NameCont, NotPartOfName }; static XMLCat charCat(UChar character) { if (character == '_') return NameStart; if (character == '.' || character == '-') return NameCont; unsigned characterTypeMask = U_GET_GC_MASK(character); if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK)) return NameStart; if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK)) return NameCont; return NotPartOfName; } static void populateAxisNamesMap(HashMap& axisNames) { struct AxisName { const char* name; Step::Axis axis; }; const AxisName axisNameList[] = { { "ancestor", Step::AncestorAxis }, { "ancestor-or-self", Step::AncestorOrSelfAxis }, { "attribute", Step::AttributeAxis }, { "child", Step::ChildAxis }, { "descendant", Step::DescendantAxis }, { "descendant-or-self", Step::DescendantOrSelfAxis }, { "following", Step::FollowingAxis }, { "following-sibling", Step::FollowingSiblingAxis }, { "namespace", Step::NamespaceAxis }, { "parent", Step::ParentAxis }, { "preceding", Step::PrecedingAxis }, { "preceding-sibling", Step::PrecedingSiblingAxis }, { "self", Step::SelfAxis } }; for (auto& axisName : axisNameList) axisNames.add(axisName.name, axisName.axis); } static bool parseAxisName(const String& name, Step::Axis& type) { static NeverDestroyed> axisNames; if (axisNames.get().isEmpty()) populateAxisNamesMap(axisNames); auto it = axisNames.get().find(name); if (it == axisNames.get().end()) return false; type = it->value; return true; } // Returns whether the current token can possibly be a binary operator, given // the previous token. Necessary to disambiguate some of the operators // (* (multiply), div, and, or, mod) in the [32] Operator rule // (check http://www.w3.org/TR/xpath#exprlex). bool Parser::isBinaryOperatorContext() const { switch (m_lastTokenType) { case 0: case '@': case AXISNAME: case '(': case '[': case ',': case AND: case OR: case MULOP: case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: case EQOP: case RELOP: return false; default: return true; } } void Parser::skipWS() { while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) ++m_nextPos; } Parser::Token Parser::makeTokenAndAdvance(int code, int advance) { m_nextPos += advance; return Token(code); } Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) { m_nextPos += advance; return Token(code, val); } Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) { m_nextPos += advance; return Token(code, val); } // Returns next char if it's there and interesting, 0 otherwise. char Parser::peekAheadHelper() { if (m_nextPos + 1 >= m_data.length()) return 0; UChar next = m_data[m_nextPos + 1]; if (next >= 0xff) return 0; return next; } char Parser::peekCurHelper() { if (m_nextPos >= m_data.length()) return 0; UChar next = m_data[m_nextPos]; if (next >= 0xff) return 0; return next; } Parser::Token Parser::lexString() { UChar delimiter = m_data[m_nextPos]; int startPos = m_nextPos + 1; for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { if (m_data[m_nextPos] == delimiter) { String value = m_data.substring(startPos, m_nextPos - startPos); if (value.isNull()) value = ""; ++m_nextPos; // Consume the char. return Token(LITERAL, value); } } // Ouch, went off the end -- report error. return Token(XPATH_ERROR); } Parser::Token Parser::lexNumber() { int startPos = m_nextPos; bool seenDot = false; // Go until end or a non-digits character. for (; m_nextPos < m_data.length(); ++m_nextPos) { UChar aChar = m_data[m_nextPos]; if (aChar >= 0xff) break; if (aChar < '0' || aChar > '9') { if (aChar == '.' && !seenDot) seenDot = true; else break; } } return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); } bool Parser::lexNCName(String& name) { int startPos = m_nextPos; if (m_nextPos >= m_data.length()) return false; if (charCat(m_data[m_nextPos]) != NameStart) return false; // Keep going until we get a character that's not good for names. for (; m_nextPos < m_data.length(); ++m_nextPos) if (charCat(m_data[m_nextPos]) == NotPartOfName) break; name = m_data.substring(startPos, m_nextPos - startPos); return true; } bool Parser::lexQName(String& name) { String n1; if (!lexNCName(n1)) return false; skipWS(); // If the next character is :, what we just got it the prefix, if not, // it's the whole thing. if (peekAheadHelper() != ':') { name = n1; return true; } String n2; if (!lexNCName(n2)) return false; name = n1 + ":" + n2; return true; } inline Parser::Token Parser::nextTokenInternal() { skipWS(); if (m_nextPos >= m_data.length()) return Token(0); char code = peekCurHelper(); switch (code) { case '(': case ')': case '[': case ']': case '@': case ',': case '|': return makeTokenAndAdvance(code); case '\'': case '\"': return lexString(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(); case '.': { char next = peekAheadHelper(); if (next == '.') return makeTokenAndAdvance(DOTDOT, 2); if (next >= '0' && next <= '9') return lexNumber(); return makeTokenAndAdvance('.'); } case '/': if (peekAheadHelper() == '/') return makeTokenAndAdvance(SLASHSLASH, 2); return makeTokenAndAdvance('/'); case '+': return makeTokenAndAdvance(PLUS); case '-': return makeTokenAndAdvance(MINUS); case '=': return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); case '!': if (peekAheadHelper() == '=') return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); return Token(XPATH_ERROR); case '<': if (peekAheadHelper() == '=') return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); case '>': if (peekAheadHelper() == '=') return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); case '*': if (isBinaryOperatorContext()) return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); ++m_nextPos; return Token(NAMETEST, "*"); case '$': { // $ QName m_nextPos++; String name; if (!lexQName(name)) return Token(XPATH_ERROR); return Token(VARIABLEREFERENCE, name); } } String name; if (!lexNCName(name)) return Token(XPATH_ERROR); skipWS(); // If we're in an operator context, check for any operator names if (isBinaryOperatorContext()) { if (name == "and") //### hash? return Token(AND); if (name == "or") return Token(OR); if (name == "mod") return Token(MULOP, NumericOp::OP_Mod); if (name == "div") return Token(MULOP, NumericOp::OP_Div); } // See whether we are at a : if (peekCurHelper() == ':') { m_nextPos++; // Any chance it's an axis name? if (peekCurHelper() == ':') { m_nextPos++; //It might be an axis name. Step::Axis axis; if (parseAxisName(name, axis)) return Token(AXISNAME, axis); // Ugh, :: is only valid in axis names -> error return Token(XPATH_ERROR); } // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest skipWS(); if (peekCurHelper() == '*') { m_nextPos++; return Token(NAMETEST, name + ":*"); } // Make a full qname. String n2; if (!lexNCName(n2)) return Token(XPATH_ERROR); name = name + ":" + n2; } skipWS(); if (peekCurHelper() == '(') { // note: we don't swallow the '(' here! // Either node type oor function name. if (name == "processing-instruction") return Token(PI); if (name == "node") return Token(NODE); if (name == "text") return Token(TEXT); if (name == "comment") return Token(COMMENT); return Token(FUNCTIONNAME, name); } // At this point, it must be NAMETEST. return Token(NAMETEST, name); } inline Parser::Token Parser::nextToken() { Token token = nextTokenInternal(); m_lastTokenType = token.type; return token; } Parser::Parser(const String& statement, XPathNSResolver* resolver) : m_data(statement) , m_resolver(resolver) , m_nextPos(0) , m_lastTokenType(0) , m_sawNamespaceError(false) { } int Parser::lex(YYSTYPE& yylval) { Token token = nextToken(); switch (token.type) { case AXISNAME: yylval.axis = token.axis; break; case MULOP: yylval.numericOpcode = token.numericOpcode; break; case RELOP: case EQOP: yylval.equalityTestOpcode = token.equalityTestOpcode; break; case NODETYPE: case FUNCTIONNAME: case LITERAL: case VARIABLEREFERENCE: case NUMBER: case NAMETEST: yylval.string = token.string.releaseImpl().leakRef(); break; } return token.type; } bool Parser::expandQualifiedName(const String& qualifiedName, String& localName, String& namespaceURI) { size_t colon = qualifiedName.find(':'); if (colon != notFound) { if (!m_resolver) { m_sawNamespaceError = true; return false; } namespaceURI = m_resolver->lookupNamespaceURI(qualifiedName.left(colon)); if (namespaceURI.isNull()) { m_sawNamespaceError = true; return false; } localName = qualifiedName.substring(colon + 1); } else localName = qualifiedName; return true; } std::unique_ptr Parser::parseStatement(const String& statement, XPathNSResolver* resolver, ExceptionCode& ec) { Parser parser(statement, resolver); int parseError = xpathyyparse(parser); if (parser.m_sawNamespaceError) { ec = NAMESPACE_ERR; return nullptr; } if (parseError) { ec = XPathException::INVALID_EXPRESSION_ERR; return nullptr; } return WTFMove(parser.m_result); } } }