diff options
author | Andrew Stitcher <astitcher@apache.org> | 2015-09-02 22:22:13 +0000 |
---|---|---|
committer | Andrew Stitcher <astitcher@apache.org> | 2015-09-02 22:22:13 +0000 |
commit | 492f7a78b8fd2ed7dd4e08184f42b7496aa8fed1 (patch) | |
tree | 16abbb56109aa97168555cbe0bb354aeede3b6b7 | |
parent | 5a416927a1afb25526a67fec59dce684e99a2dcb (diff) | |
download | qpid-python-492f7a78b8fd2ed7dd4e08184f42b7496aa8fed1.tar.gz |
QPID-6718: Complete parsing for all styles of numeric literals
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1700915 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | qpid/cpp/src/qpid/broker/SelectorExpression.cpp | 71 | ||||
-rw-r--r-- | qpid/cpp/src/qpid/broker/SelectorToken.cpp | 53 | ||||
-rw-r--r-- | qpid/cpp/src/tests/Selector.cpp | 38 |
3 files changed, 149 insertions, 13 deletions
diff --git a/qpid/cpp/src/qpid/broker/SelectorExpression.cpp b/qpid/cpp/src/qpid/broker/SelectorExpression.cpp index 2884dd967a..d882814443 100644 --- a/qpid/cpp/src/qpid/broker/SelectorExpression.cpp +++ b/qpid/cpp/src/qpid/broker/SelectorExpression.cpp @@ -27,6 +27,8 @@ #include "qpid/sys/IntegerTypes.h" #include "qpid/sys/regex.h" +#include <cstdlib> +#include <cerrno> #include <string> #include <memory> #include <ostream> @@ -45,6 +47,10 @@ * * Alpha ::= [a-zA-Z] * Digit ::= [0-9] + * HexDigit ::= [0-9a-fA-F] + * OctDigit ::= [0-7] + * BinDigit ::= [0-1] + * * IdentifierInitial ::= Alpha | "_" | "$" * IdentifierPart ::= IdentifierInitial | Digit | "." * Identifier ::= IdentifierInitial IdentifierPart* @@ -52,7 +58,10 @@ * * LiteralString ::= ("'" [^']* "'")+ // Repeats to cope with embedded single quote * - * LiteralExactNumeric ::= Digit+ + * // LiteralExactNumeric is a little simplified as it also allows underscores ("_") as internal seperators and suffix "l" or "L" + * LiteralExactNumeric ::= "0x" HexDigit+ | "0X" HexDigit+ | "0b" BinDigit+ | "0B" BinDigit+ | "0" OctDigit* | Digit+ + * + * // LiteralApproxNumeric is a little simplified as it also allows suffix "d", "D", "f", "F" * Exponent ::= ('+'|'-')? LiteralExactNumeric * LiteralApproxNumeric ::= ( Digit "." Digit* ( "E" Exponent )? ) | * ( "." Digit+ ( "E" Exponent )? ) | @@ -86,7 +95,8 @@ * * MultiplyExpression :: = UnaryArithExpression ( MultiplyOps UnaryArithExpression )* * - * UnaryArithExpression ::= AddOps AddExpression | + * UnaryArithExpression ::= "-" LiteralExactNumeric | // This is a special case to simplify negative ints + * AddOps AddExpression | * "(" OrExpression ")" | * PrimaryExpression * @@ -957,9 +967,17 @@ Expression* unaryArithExpression(Tokeniser& tokeniser) case T_PLUS: break; // Unary + is no op case T_MINUS: { - std::auto_ptr<Expression> e(unaryArithExpression(tokeniser)); - if (!e.get()) return 0; - return new UnaryArithExpression(&negate, e.release()); + const Token t = tokeniser.nextToken(); + // Special case for negative numerics + if (t.type==T_NUMERIC_EXACT) { + std::auto_ptr<Expression> e(parseExactNumeric(t, true)); + return e.release(); + } else { + tokeniser.returnTokens(); + std::auto_ptr<Expression> e(unaryArithExpression(tokeniser)); + if (!e.get()) return 0; + return new UnaryArithExpression(&negate, e.release()); + } } default: tokeniser.returnTokens(); @@ -970,7 +988,44 @@ Expression* unaryArithExpression(Tokeniser& tokeniser) return e.release(); } -Expression* primaryExpression(Tokeniser& tokeniser) +Expression* parseExactNumeric(const Token& token, bool negate) +{ + int base = 0; + string s; + std::remove_copy(token.val.begin(), token.val.end(), std::back_inserter(s), '_'); + if (s[1]=='b' || s[1]=='B') { + base = 2; + s = s.substr(2); + } else if (s[1]=='x' || s[1]=='X') { + base = 16; + s = s.substr(2); + } if (s[0]=='0') { + base = 8; + } + errno = 0; + uint64_t value = std::strtoull(s.c_str(), 0, base); + if (!errno && (base || value<=INT64_MAX)) { + return new Literal(static_cast<int64_t>(negate ? -value : value)); + } + if (negate && value==INT64_MAX+1ull) return new Literal(INT64_MIN); + error = "integer literal too big"; + return 0; +} + +Expression* parseApproxNumeric(const Token& token) +{ + errno = 0; + string s; + std::remove_copy(token.val.begin(), token.val.end(), std::back_inserter(s), '_'); + double value = std::strtod(s.c_str(), 0); + if (!errno) return new Literal(value); + error = "floating literal overflow/underflow"; + return 0; +} + +Expression* primaryExpression(Tokeniser& tokeniser + +) { const Token& t = tokeniser.nextToken(); switch (t.type) { @@ -983,9 +1038,9 @@ Expression* primaryExpression(Tokeniser& tokeniser) case T_TRUE: return new Literal(true); case T_NUMERIC_EXACT: - return new Literal(boost::lexical_cast<int64_t>(t.val)); + return parseExactNumeric(t, false); case T_NUMERIC_APPROX: - return new Literal(boost::lexical_cast<double>(t.val)); + return parseApproxNumeric(t); default: error = "expected literal or identifier"; return 0; diff --git a/qpid/cpp/src/qpid/broker/SelectorToken.cpp b/qpid/cpp/src/qpid/broker/SelectorToken.cpp index d69267b2e5..02d716dad7 100644 --- a/qpid/cpp/src/qpid/broker/SelectorToken.cpp +++ b/qpid/cpp/src/qpid/broker/SelectorToken.cpp @@ -30,7 +30,7 @@ namespace qpid { namespace broker { -// Tokeniserss always take string const_iterators to mark the beginning and end of the string being tokenised +// Tokenisers always take string const_iterators to mark the beginning and end of the string being tokenised // if the tokenise is successful then the start iterator is advanced, if the tokenise fails then the start // iterator is unchanged. @@ -149,7 +149,13 @@ bool tokenise(std::string::const_iterator& s, std::string::const_iterator& e, To START, REJECT, IDENTIFIER, + ZERO, DIGIT, + HEXDIGIT_START, + HEXDIGIT, + OCTDIGIT, + BINDIGIT_START, + BINDIGIT, DECIMAL_START, DECIMAL, EXPONENT_SIGN, @@ -193,6 +199,7 @@ bool tokenise(std::string::const_iterator& s, std::string::const_iterator& e, To if (isIdentifierStart(*t)) {++t; state = IDENTIFIER;} else if (*t=='\'') {return processString(s, e, '\'', T_STRING, tok);} else if (*t=='\"') {return processString(s, e, '\"', T_IDENTIFIER, tok);} + else if (*t=='0') {++t; state = ZERO;} else if (std::isdigit(*t)) {++t; state = DIGIT;} else if (*t=='.') {++t; state = DECIMAL_START;} else state = REJECT; @@ -218,22 +225,62 @@ bool tokenise(std::string::const_iterator& s, std::string::const_iterator& e, To else if (std::isdigit(*t)) {++t; state = EXPONENT;} else state = REJECT; continue; + case ZERO: + if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + else if (*t=='.') {++t; state = DECIMAL;} + else if (*t=='x' || *t=='X') {++t; state = HEXDIGIT_START;} + else if (*t=='b' || *t=='B') {++t; state = BINDIGIT_START;} + else state = OCTDIGIT; + continue; + case HEXDIGIT_START: + if (t==e) {state = REJECT;} + else if (std::isxdigit(*t)) {++t; state = HEXDIGIT;} + else state = REJECT; + continue; + case HEXDIGIT: + if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;} + else if (std::isxdigit(*t) || *t=='_') {++t; state = HEXDIGIT;} + else if (*t=='p' || *t=='P') {++t; state = EXPONENT_SIGN;} + else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + continue; + case BINDIGIT_START: + if (t==e) {state = REJECT;} + else if (*t=='0' || *t=='1') {++t; state = BINDIGIT;} + else state = REJECT; + continue; + case BINDIGIT: + if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;} + else if (*t=='0' || *t=='1' || *t=='_') {++t; state = BINDIGIT;} + else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + continue; + case OCTDIGIT: + if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;} + else if ((std::isdigit(*t) && *t<'8') || *t=='_') {++t; state = OCTDIGIT;} + else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + continue; case DIGIT: if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} - else if (std::isdigit(*t)) {++t; state = DIGIT;} + else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;} + else if (*t=='f' || *t=='F' || *t=='d' || *t=='D') {tokType = T_NUMERIC_APPROX; state = ACCEPT_INC;} + else if (std::isdigit(*t) || *t=='_') {++t; state = DIGIT;} else if (*t=='.') {++t; state = DECIMAL;} else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;} else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} continue; case DECIMAL: if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} - else if (std::isdigit(*t)) {++t; state = DECIMAL;} + else if (std::isdigit(*t) || *t=='_') {++t; state = DECIMAL;} else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;} + else if (*t=='f' || *t=='F' || *t=='d' || *t=='D') {tokType = T_NUMERIC_APPROX; state = ACCEPT_INC;} else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} continue; case EXPONENT: if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} else if (std::isdigit(*t)) {++t; state = EXPONENT;} + else if (*t=='f' || *t=='F' || *t=='d' || *t=='D') {tokType = T_NUMERIC_APPROX; state = ACCEPT_INC;} else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} continue; case ACCEPT_INC: diff --git a/qpid/cpp/src/tests/Selector.cpp b/qpid/cpp/src/tests/Selector.cpp index 7d6dbf0010..d512476f8b 100644 --- a/qpid/cpp/src/tests/Selector.cpp +++ b/qpid/cpp/src/tests/Selector.cpp @@ -166,7 +166,8 @@ QPID_AUTO_TEST_CASE(tokeniseSuccess) verifyTokeniserSuccess(&tokenise, "<> Identifier", qb::T_NEQ, "<>", " Identifier"); verifyTokeniserSuccess(&tokenise, "(a and b) not c", qb::T_LPAREN, "(", "a and b) not c"); verifyTokeniserSuccess(&tokenise, ") not c", qb::T_RPAREN, ")", " not c"); - verifyTokeniserSuccess(&tokenise, "019kill", qb::T_NUMERIC_EXACT, "019", "kill"); + verifyTokeniserSuccess(&tokenise, "017kill", qb::T_NUMERIC_EXACT, "017", "kill"); + verifyTokeniserSuccess(&tokenise, "019kill", qb::T_NUMERIC_EXACT, "01", "9kill"); verifyTokeniserSuccess(&tokenise, "0kill", qb::T_NUMERIC_EXACT, "0", "kill"); verifyTokeniserSuccess(&tokenise, "0.kill", qb::T_NUMERIC_APPROX, "0.", "kill"); verifyTokeniserSuccess(&tokenise, "3.1415=pi", qb::T_NUMERIC_APPROX, "3.1415", "=pi"); @@ -174,7 +175,15 @@ QPID_AUTO_TEST_CASE(tokeniseSuccess) verifyTokeniserSuccess(&tokenise, "2e5.kill", qb::T_NUMERIC_APPROX, "2e5", ".kill"); verifyTokeniserSuccess(&tokenise, "3.e50easy to kill", qb::T_NUMERIC_APPROX, "3.e50", "easy to kill"); verifyTokeniserSuccess(&tokenise, "34.25e+50easy to kill", qb::T_NUMERIC_APPROX, "34.25e+50", "easy to kill"); - verifyTokeniserSuccess(&tokenise, "34.e-50easy to kill", qb::T_NUMERIC_APPROX, "34.e-50", "easy to kill"); + verifyTokeniserSuccess(&tokenise, "34de", qb::T_NUMERIC_APPROX, "34d", "e"); + verifyTokeniserSuccess(&tokenise, "34fuller", qb::T_NUMERIC_APPROX, "34f", "uller"); + verifyTokeniserSuccess(&tokenise, "34Longer", qb::T_NUMERIC_EXACT, "34L", "onger"); + verifyTokeniserSuccess(&tokenise, "34littler", qb::T_NUMERIC_EXACT, "34l", "ittler"); + verifyTokeniserSuccess(&tokenise, "034Longer", qb::T_NUMERIC_EXACT, "034L", "onger"); + verifyTokeniserSuccess(&tokenise, "034littler", qb::T_NUMERIC_EXACT, "034l", "ittler"); + verifyTokeniserSuccess(&tokenise, "0X34littler", qb::T_NUMERIC_EXACT, "0X34l", "ittler"); + verifyTokeniserSuccess(&tokenise, "0X3456_fffflittler", qb::T_NUMERIC_EXACT, "0X3456_ffffl", "ittler"); + verifyTokeniserSuccess(&tokenise, "0xdead_beafittler", qb::T_NUMERIC_EXACT, "0xdead_beaf", "ittler"); } QPID_AUTO_TEST_CASE(tokeniseFailure) @@ -201,6 +210,8 @@ QPID_AUTO_TEST_CASE(tokeniseFailure) verifyTokeniserFail(&tokeniseNumeric, "34e"); verifyTokeniserFail(&tokeniseNumeric, ".3e+"); verifyTokeniserFail(&tokeniseNumeric, ".3e-."); + verifyTokeniserFail(&tokenise, "0b34Longer"); + verifyTokeniserFail(&tokenise, "0X_34Longer"); } QPID_AUTO_TEST_CASE(tokenString) @@ -404,6 +415,29 @@ QPID_AUTO_TEST_CASE(numericEval) BOOST_CHECK(qb::Selector("-A=0-A").eval(env)); } +QPID_AUTO_TEST_CASE(numericLiterals) +{ + TestSelectorEnv env; + + BOOST_CHECK(qb::Selector(" 9223372036854775807>0").eval(env)); + BOOST_CHECK(qb::Selector("-9223372036854775807<0").eval(env)); + BOOST_CHECK_THROW(qb::Selector(" 9223372036854775808>0").eval(env), std::range_error); + BOOST_CHECK(qb::Selector("0x8000_0000_0000_0001=-9223372036854775807").eval(env)); + BOOST_CHECK_THROW(qb::Selector("-9223372036854775809<0").eval(env), std::range_error); + BOOST_CHECK(qb::Selector(" 9223372036854775807L<>0").eval(env)); + BOOST_CHECK(qb::Selector("-9223372036854775807L<>0").eval(env)); + BOOST_CHECK(qb::Selector("-9223372036854775808<>0").eval(env)); + BOOST_CHECK(qb::Selector("-9223372036854775808=0x8000_0000_0000_0000").eval(env)); + BOOST_CHECK(qb::Selector("0x8000_0000_0000_0000<9223372036854775807").eval(env)); + BOOST_CHECK(qb::Selector(" 0.4f>0.3d").eval(env)); + BOOST_CHECK(qb::Selector(" 1000_020.4f>0.3d").eval(env)); + BOOST_CHECK(qb::Selector(" 1000_020.4f>0x800p-3").eval(env)); + BOOST_CHECK(qb::Selector(" 0x1000_0000=0x1000_0000p0").eval(env)); + BOOST_CHECK(qb::Selector(" 0xFF=255L").eval(env)); + BOOST_CHECK(qb::Selector(" 077L=0b111_111").eval(env)); + BOOST_CHECK(qb::Selector(" 077L=63").eval(env)); +} + QPID_AUTO_TEST_CASE(comparisonEval) { TestSelectorEnv env; |