// Copyright (C) 2016 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include "../cplusplus_global.h" #include #include #include #include //#define DEBUG_TOKENS typedef QList TokenKindList; typedef QByteArray _; Q_DECLARE_METATYPE(TokenKindList) Q_DECLARE_METATYPE(CPlusPlus::Tokens) Q_DECLARE_METATYPE(CPlusPlus::Kind) //TESTED_COMPONENT=src/libs/cplusplus using namespace CPlusPlus; class tst_SimpleLexer: public QObject { Q_OBJECT public: tst_SimpleLexer() : _state(0) {} enum TokenCompareFlag { CompareKind = 1 << 1, CompareBytes = 1 << 2, CompareBytesBegin = 1 << 3, CompareBytesEnd = 1 << 4, CompareUtf16Chars = 1 << 5, CompareUtf16CharsBegin = 1 << 6, CompareUtf16CharsEnd = 1 << 7, CompareUserDefinedLiteral = 1 << 8 }; Q_DECLARE_FLAGS(TokenCompareFlags, TokenCompareFlag) private slots: void basic(); void basic_data(); void cxx20(); void incremental(); void incremental_data(); void literals(); void literals_data(); void preprocessor(); void preprocessor_data(); void ppOpOrPunc(); void ppOpOrPunc_data(); void digraph(); void digraph_data(); void trigraph(); void trigraph_data(); void bytes_and_utf16chars(); void bytes_and_utf16chars_data(); void user_defined_literals(); void user_defined_literals_data(); void offsets(); void offsets_data(); private: static Tokens toTokens(const TokenKindList &tokenKinds); void run(const QByteArray &source, const Tokens &expectedTokens, bool preserveState, TokenCompareFlags compareFlags, bool preprocessorMode = false, const LanguageFeatures &extraLanguageFeatures = LanguageFeatures()); int _state; }; Q_DECLARE_OPERATORS_FOR_FLAGS(tst_SimpleLexer::TokenCompareFlags) Tokens tst_SimpleLexer::toTokens(const TokenKindList &tokenKinds) { Tokens tokens; for (unsigned tokenKind : tokenKinds) { Token token; token.f.kind = tokenKind; tokens << token; } return tokens; } void tst_SimpleLexer::run(const QByteArray &source, const Tokens &expectedTokens, bool preserveState, TokenCompareFlags compareFlags, bool preprocessorMode, const LanguageFeatures &extraLanguageFeatures) { QVERIFY(compareFlags); SimpleLexer lexer; lexer.setPreprocessorMode(preprocessorMode); if (extraLanguageFeatures.flags) { LanguageFeatures languageFeatures = lexer.languageFeatures(); languageFeatures.flags |= extraLanguageFeatures.flags; lexer.setLanguageFeatures(languageFeatures); } const Tokens tokens = lexer(QString::fromUtf8(source), preserveState ? _state : 0); if (preserveState) _state = lexer.state(); int i = 0; for (; i < tokens.size(); ++i) { QVERIFY2(i < expectedTokens.size(), "More tokens than expected."); const Token token = tokens.at(i); const Token expectedToken = expectedTokens.at(i); #ifdef DEBUG_TOKENS qDebug("Comparing (i=%d): \"%s\" \"%s\"", i, Token::name(token.kind()), Token::name(expectedToken.kind())); #endif if (compareFlags & CompareKind) QCOMPARE(token.kind(), expectedToken.kind()); if (compareFlags & CompareBytes) QCOMPARE(token.bytes(), expectedToken.bytes()); if (compareFlags & CompareBytesBegin) QCOMPARE(token.bytesBegin(), expectedToken.bytesBegin()); if (compareFlags & CompareBytesEnd) QCOMPARE(token.bytesEnd(), expectedToken.bytesEnd()); if (compareFlags & CompareUtf16Chars) QCOMPARE(token.utf16chars(), expectedToken.utf16chars()); if (compareFlags & CompareUtf16CharsBegin) QCOMPARE(token.utf16charsBegin(), expectedToken.utf16charsBegin()); if (compareFlags & CompareUtf16CharsEnd) QCOMPARE(token.utf16charsEnd(), expectedToken.utf16charsEnd()); if (compareFlags & CompareUserDefinedLiteral) QCOMPARE(token.userDefinedLiteral(), expectedToken.userDefinedLiteral()); } QString msg = QLatin1String("Less tokens than expected: got %1, expected %2."); msg = msg.arg(i).arg(expectedTokens.size()); QVERIFY2(i == expectedTokens.size(), msg.toUtf8().constData()); } void tst_SimpleLexer::basic() { QFETCH(QByteArray, source); QFETCH(TokenKindList, expectedTokenKindList); run(source, toTokens(expectedTokenKindList), false, CompareKind); } void tst_SimpleLexer::basic_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokenKindList"); QByteArray source; TokenKindList expectedTokenKindList; source = "// comment"; expectedTokenKindList = TokenKindList() << T_CPP_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "//// comment"; expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "/// comment"; expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "///< comment"; expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "//! comment"; expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "//!< comment"; expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "///\n"; expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "///\n" "int i;"; expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT << T_INT << T_IDENTIFIER << T_SEMICOLON; QTest::newRow(source) << source << expectedTokenKindList; source = "/* comment */\n"; expectedTokenKindList = TokenKindList() << T_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "/* comment\n" " comment\n" " */\n"; expectedTokenKindList = TokenKindList() << T_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "/** comment */"; expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "/** comment */\n"; expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "/** comment */ int i;\n"; expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT << T_INT << T_IDENTIFIER << T_SEMICOLON; QTest::newRow(source) << source << expectedTokenKindList; source = "/**\n" " * comment\n" " */\n"; expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "/*!\n" " * comment\n" " */\n"; expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "/*!\n" " comment\n" "*/\n"; expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; source = "int i; /*!< first counter */\n" "int j; /**< second counter */\n" "int k; ///< third counter\n" "int l; //!< fourth counter\n" " //!< more details... "; expectedTokenKindList = TokenKindList() << T_INT << T_IDENTIFIER << T_SEMICOLON << T_DOXY_COMMENT << T_INT << T_IDENTIFIER << T_SEMICOLON << T_DOXY_COMMENT << T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT << T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT << T_CPP_DOXY_COMMENT; QTest::newRow(source) << source << expectedTokenKindList; } void tst_SimpleLexer::cxx20() { LanguageFeatures features; features.cxxEnabled = features.cxx11Enabled = features.cxx14Enabled = features.cxx20Enabled = true; const QString source = R"( template concept IsPointer = requires(T p) { *p; }; SomeType coroutine() { constinit const char8_t = 'c'; if consteval {} else {} co_await std::suspend_always{}; co_yield 1; co_return; } )"; const TokenKindList expectedTokens = { T_TEMPLATE, T_LESS, T_TYPENAME, T_IDENTIFIER, T_GREATER, T_CONCEPT, T_IDENTIFIER, T_EQUAL, T_REQUIRES, T_LPAREN, T_IDENTIFIER, T_IDENTIFIER, T_RPAREN, T_LBRACE, T_STAR, T_IDENTIFIER, T_SEMICOLON, T_RBRACE, T_SEMICOLON, T_IDENTIFIER, T_IDENTIFIER, T_LPAREN, T_RPAREN, T_LBRACE, T_CONSTINIT, T_CONST, T_CHAR8_T, T_EQUAL, T_CHAR_LITERAL, T_SEMICOLON, T_IF, T_CONSTEVAL, T_LBRACE, T_RBRACE, T_ELSE, T_LBRACE, T_RBRACE, T_CO_AWAIT, T_IDENTIFIER, T_COLON_COLON, T_IDENTIFIER, T_LBRACE, T_RBRACE, T_SEMICOLON, T_CO_YIELD, T_NUMERIC_LITERAL, T_SEMICOLON, T_CO_RETURN, T_SEMICOLON, T_RBRACE }; run(source.toUtf8(), toTokens(expectedTokens), false, CompareKind, false, features); } void tst_SimpleLexer::literals() { QFETCH(QByteArray, source); QFETCH(TokenKindList, expectedTokenKindList); LanguageFeatures features; features.cxx14Enabled = true; run(source, toTokens(expectedTokenKindList), false, CompareKind, false, features); } void tst_SimpleLexer::literals_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokenKindList"); QByteArray source; TokenKindList expectedTokenKindList; source = "1.\n" "1.1\n" "1.23456789\n" ".1\n" ".3e8\n" ".3e8f\n" "1e1\n" "1E1\n" "-1e-1\n" // the first minus sign is a separate token! "1e-1\n" "1e+1\n" "1e1L\n" "1e1l\n" "1e1f\n" "1e1F\n" "23.45x" ".45x" ; expectedTokenKindList = TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_MINUS << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_ERROR << T_ERROR ; QTest::newRow("float-literals") << source << expectedTokenKindList; source = // these are all the same "42\n" "0b101010u\n" "0b101'010u\n" "052ll\n" "0'5'2ll\n" "0x2aL\n" "0x2'aL\n" "123FOO\n" "0xfOo\n" "33_\n" ; expectedTokenKindList = TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_ERROR << T_ERROR << T_ERROR ; QTest::newRow("integer-literals") << source << expectedTokenKindList; source = "42ui64\n" "43UI64\n" "44Ui64\n" "45uI64\n" "46i64\n" "47I64\n" "0xffffui64\n" "0xfffeUi64\n" "0xfffdi64\n" "56ui\n" // incomplete "56ui6\n" "57ui67\n" // wrong "58i67\n" ; expectedTokenKindList = TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_ERROR << T_ERROR ; QTest::newRow("microsoft-suffix") << source << expectedTokenKindList; source = "42u\n" "43U\n" "44ul\n" "45UL\n" "46uL\n" "47Ul\n" "48ull\n" "49ULL\n" "50uLL\n" "51Ull\n" "52l\n" "53L\n" "54ll\n" "55LL\n" "56lu\n" "57LU\n" "58lU\n" "59Lu\n" "60llu\n" "61llU\n" "62LLU\n" "63LLu\n" "64lL\n" // wrong "65Ll\n" // wrong "66luu\n" // wrong "67LUU\n" // wrong "68lul\n" // wrong "69LUL\n" // wrong ; expectedTokenKindList = TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_ERROR << T_ERROR << T_ERROR << T_ERROR << T_ERROR << T_ERROR ; QTest::newRow("integer-suffix") << source << expectedTokenKindList; source = "R\"(raw text)\"\n" "R\"delimiter(raw text)delimiter\"\n" "R\"delimiter(\nraw text line1\nraw text line2\n)delimiter\"\n" ; expectedTokenKindList = TokenKindList() << T_RAW_STRING_LITERAL << T_RAW_STRING_LITERAL << T_RAW_STRING_LITERAL ; QTest::newRow("raw-string-literals") << source << expectedTokenKindList; source = "R\"\\" ; expectedTokenKindList = TokenKindList() << T_ERROR; QTest::newRow("invalid-raw-string-literals1") << source << expectedTokenKindList; source = "R\")" ; expectedTokenKindList = TokenKindList() << T_ERROR; QTest::newRow("invalid-raw-string-literals2") << source << expectedTokenKindList; } void tst_SimpleLexer::preprocessor() { QFETCH(QByteArray, source); QFETCH(TokenKindList, expectedTokenKindList); run(source, toTokens(expectedTokenKindList), false, CompareKind, true); } void tst_SimpleLexer::preprocessor_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokenKindList"); QByteArray source; TokenKindList expectedTokenKindList; source = // sad but true [2.10] "1\n" "1x.\n" "1.y\n" ".1_1.1.\n" "1e-\n" "01x1b2qWeRtty_Grumble+E-.\n" ; expectedTokenKindList = TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL; QTest::newRow("pp-number") << source << expectedTokenKindList; } void tst_SimpleLexer::ppOpOrPunc() { QFETCH(Kind, expectedTokenKind); const QByteArray source = QTest::currentDataTag(); LanguageFeatures languageFeatures; languageFeatures.cxxEnabled = true; languageFeatures.cxx20Enabled = true; run(source, toTokens({unsigned(expectedTokenKind)}), false, CompareKind, true, languageFeatures); } void tst_SimpleLexer::ppOpOrPunc_data() { QTest::addColumn("expectedTokenKind"); // N4296 - [2.12] QTest::newRow("{") << T_LBRACE; QTest::newRow("}") << T_RBRACE; QTest::newRow("[") << T_LBRACKET; QTest::newRow("]") << T_RBRACKET; QTest::newRow("#") << T_POUND; QTest::newRow("##") << T_POUND_POUND; QTest::newRow("(") << T_LPAREN; QTest::newRow(")") << T_RPAREN; QTest::newRow("<:") << T_LBRACKET; QTest::newRow(":>") << T_RBRACKET; QTest::newRow("<%") << T_LBRACE; QTest::newRow("%>") << T_RBRACE; QTest::newRow("%:") << T_POUND; QTest::newRow("%:%:") << T_POUND_POUND; QTest::newRow(";") << T_SEMICOLON; QTest::newRow(":") << T_COLON; QTest::newRow("...") << T_DOT_DOT_DOT; QTest::newRow("new") << T_NEW; QTest::newRow("delete") << T_DELETE; QTest::newRow("?") << T_QUESTION; QTest::newRow("::") << T_COLON_COLON; QTest::newRow(".") << T_DOT; QTest::newRow(".*") << T_DOT_STAR; QTest::newRow("+") << T_PLUS; QTest::newRow("-") << T_MINUS; QTest::newRow("*") << T_STAR; QTest::newRow("/") << T_SLASH; QTest::newRow("%") << T_PERCENT; QTest::newRow("^") << T_CARET; QTest::newRow("&") << T_AMPER; QTest::newRow("|") << T_PIPE; QTest::newRow("~") << T_TILDE; QTest::newRow("^=") << T_CARET_EQUAL; QTest::newRow("&=") << T_AMPER_EQUAL; QTest::newRow("|=") << T_PIPE_EQUAL; QTest::newRow("<<") << T_LESS_LESS; QTest::newRow(">>") << T_GREATER_GREATER; QTest::newRow(">>=") << T_GREATER_GREATER_EQUAL; QTest::newRow("<<=") << T_LESS_LESS_EQUAL; QTest::newRow("==") << T_EQUAL_EQUAL; QTest::newRow("!=") << T_EXCLAIM_EQUAL; QTest::newRow("<=") << T_LESS_EQUAL; QTest::newRow("<=>") << T_LESS_EQUAL_GREATER; QTest::newRow(">=") << T_GREATER_EQUAL; QTest::newRow("&&") << T_AMPER_AMPER; QTest::newRow("||") << T_PIPE_PIPE; QTest::newRow("++") << T_PLUS_PLUS; QTest::newRow("--") << T_MINUS_MINUS; QTest::newRow(",") << T_COMMA; QTest::newRow("->*") << T_ARROW_STAR; QTest::newRow("->") << T_ARROW; QTest::newRow("and") << T_AND; QTest::newRow("and_eq") << T_AND_EQ; QTest::newRow("bitand") << T_BITAND; QTest::newRow("bitor") << T_BITOR; QTest::newRow("compl") << T_COMPL; QTest::newRow("not") << T_NOT; QTest::newRow("not_eq") << T_NOT_EQ; QTest::newRow("or") << T_OR; QTest::newRow("or_eq") << T_OR_EQ; QTest::newRow("xor") << T_XOR; QTest::newRow("xor_eq") << T_XOR_EQ; } void tst_SimpleLexer::bytes_and_utf16chars() { QFETCH(QByteArray, source); QFETCH(Tokens, expectedTokens); const TokenCompareFlags compareFlags = CompareKind | CompareBytes | CompareUtf16Chars; run(source, expectedTokens, false, compareFlags); } static Tokens createToken(unsigned kind, unsigned bytes, unsigned utf16chars, bool userDefinedLiteral = false) { Token t; t.f.kind = kind; t.f.bytes = bytes; t.f.utf16chars = utf16chars; t.f.userDefinedLiteral = userDefinedLiteral; return Tokens() << t; } void tst_SimpleLexer::bytes_and_utf16chars_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokens"); typedef QByteArray _; // LATIN1 Identifier QTest::newRow("latin1 identifier") << _("var") << createToken(T_IDENTIFIER, 3, 3); // NON-LATIN1 identifier (code point with 2 UTF8 code units) QTest::newRow("non-latin1 identifier (2-byte code unit at start)") << _(UC_U00FC "_var") << createToken(T_IDENTIFIER, 6, 5); QTest::newRow("non-latin1 identifier (2-byte code unit in center)") << _("_v" UC_U00FC "r_") << createToken(T_IDENTIFIER, 6, 5); QTest::newRow("non-latin1 identifier (2-byte code unit at end)") << _("var_" UC_U00FC) << createToken(T_IDENTIFIER, 6, 5); QTest::newRow("non-latin1 identifier (2-byte code unit only)") << _(UC_U00FC) << createToken(T_IDENTIFIER, 2, 1); // NON-LATIN1 identifier (code point with 3 UTF8 code units) QTest::newRow("non-latin1 identifier (3-byte code unit at start)") << _(UC_U4E8C "_var") << createToken(T_IDENTIFIER, 7, 5); QTest::newRow("non-latin1 identifier (3-byte code unit in center)") << _("_v" UC_U4E8C "r_") << createToken(T_IDENTIFIER, 7, 5); QTest::newRow("non-latin1 identifier (3-byte code unit at end)") << _("var_" UC_U4E8C) << createToken(T_IDENTIFIER, 7, 5); QTest::newRow("non-latin1 identifier (3-byte code unit only)") << _(UC_U4E8C) << createToken(T_IDENTIFIER, 3, 1); // NON-LATIN1 identifier (code point with 4 UTF8 code units) QTest::newRow("non-latin1 identifier (4-byte code unit at start)") << _(UC_U10302 "_var") << createToken(T_IDENTIFIER, 8, 6); QTest::newRow("non-latin1 identifier (4-byte code unit in center)") << _("_v" UC_U10302 "r_") << createToken(T_IDENTIFIER, 8, 6); QTest::newRow("non-latin1 identifier (4-byte code unit at end)") << _("var_" UC_U10302) << createToken(T_IDENTIFIER, 8, 6); QTest::newRow("non-latin1 identifier (4-byte code unit only)") << _(UC_U10302) << createToken(T_IDENTIFIER, 4, 2); // NON-LATIN1 identifier (code points with several multi-byte UTF8 code units) QTest::newRow("non-latin1 identifier (mixed multi-byte code units at start)") << _(UC_U00FC UC_U4E8C UC_U10302 "_var") << createToken(T_IDENTIFIER, 13, 8); QTest::newRow("non-latin1 identifier (mixed multi-byte code units in center)") << _("_v" UC_U00FC UC_U4E8C UC_U10302 "r_") << createToken(T_IDENTIFIER, 13, 8); QTest::newRow("non-latin1 identifier (mixed multi-byte code units at end)") << _("var_" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 13, 8); QTest::newRow("non-latin1 identifier (mixed multi-byte code units only)") << _(UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 9, 4); // Comments QTest::newRow("ascii comment /* ... */") << _("/* hello world */") << createToken(T_COMMENT, 17, 17); QTest::newRow("latin1 comment //") << _("// hello world") << createToken(T_CPP_COMMENT, 14, 14); QTest::newRow("non-latin1 comment /* ... */ (1)") << _("/* " UC_U00FC UC_U4E8C UC_U10302 " */") << createToken(T_COMMENT, 15, 10); QTest::newRow("non-latin1 comment /* ... */ (2)") << _("/*" UC_U00FC UC_U4E8C UC_U10302 "*/") << createToken(T_COMMENT, 13, 8); QTest::newRow("non-latin1 comment // (1)") << _("// " UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 12, 7); QTest::newRow("non-latin1 comment // (2)") << _("//" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 11, 6); // String Literals QTest::newRow("latin1 string literal") << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7); QTest::newRow("non-latin1 string literal") << _("\"" UC_U00FC UC_U4E8C UC_U10302 "\"") << createToken(T_STRING_LITERAL, 11, 6); } void tst_SimpleLexer::user_defined_literals() { QFETCH(QByteArray, source); QFETCH(Tokens, expectedTokens); const TokenCompareFlags compareFlags = CompareKind | CompareBytes | CompareUtf16Chars | CompareUserDefinedLiteral; LanguageFeatures languageFeatures; languageFeatures.cxx11Enabled = true; languageFeatures.cxxEnabled = true; run(source, expectedTokens, false, compareFlags, false, languageFeatures); } void tst_SimpleLexer::user_defined_literals_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokens"); typedef QByteArray _; // String User-defined Literals QTest::newRow("latin1 string non-user-defined literal") << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7, false); QTest::newRow("latin1 string user-defined literal") << _("\"hello\"_udl") << createToken(T_STRING_LITERAL, 11, 11, true); // Numeric User-defined Literals QTest::newRow("numeric non user-defined literal with integer suffix") << _("11LL") << createToken(T_NUMERIC_LITERAL, 4, 4, false); QTest::newRow("numeric non user-defined literal with decimal part") << _("11.1") << createToken(T_NUMERIC_LITERAL, 4, 4, false); QTest::newRow("numeric non user-defined literal with float suffix") << _("11.1f") << createToken(T_NUMERIC_LITERAL, 5, 5, false); QTest::newRow("numeric user-defined literal without decimal part") << _("11_udl") << createToken(T_NUMERIC_LITERAL, 6, 6, true); QTest::newRow("numeric user-defined literal with decimal part") << _("11.1_udl") << createToken(T_NUMERIC_LITERAL, 8, 8, true); QTest::newRow("numeric user-defined reserved literal") << _("11ms") << createToken(T_NUMERIC_LITERAL, 4, 4, true); } static Token createToken(unsigned kind, unsigned byteOffset, unsigned bytes, unsigned utf16charsOffset, unsigned utf16chars) { Token t; t.f.kind = kind; t.byteOffset = byteOffset; t.f.bytes = bytes; t.utf16charOffset = utf16charsOffset; t.f.utf16chars = utf16chars; return t; } void tst_SimpleLexer::offsets() { QFETCH(QByteArray, source); QFETCH(Tokens, expectedTokens); const TokenCompareFlags compareFlags = CompareKind | CompareBytesBegin | CompareBytesEnd | CompareUtf16CharsBegin | CompareUtf16CharsEnd ; LanguageFeatures languageFeatures; languageFeatures.cxxEnabled = true; run(source, expectedTokens, false, compareFlags, false, languageFeatures); } void tst_SimpleLexer::offsets_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokens"); typedef QByteArray _; // LATIN1 Identifier QTest::newRow("latin1 identifiers") << _("var var") << (Tokens() << createToken(T_IDENTIFIER, 0, 3, 0, 3) << createToken(T_IDENTIFIER, 4, 3, 4, 3) ); // NON-LATIN1 identifier QTest::newRow("non-latin1 identifiers 1") << _("var_" UC_U00FC " var_" UC_U00FC) << (Tokens() << createToken(T_IDENTIFIER, 0, 6, 0, 5) << createToken(T_IDENTIFIER, 7, 6, 6, 5) ); QTest::newRow("non-latin1 identifiers 2") << _(UC_U00FC UC_U4E8C UC_U10302 " " UC_U00FC UC_U4E8C UC_U10302) << (Tokens() << createToken(T_IDENTIFIER, 0, 9, 0, 4) << createToken(T_IDENTIFIER, 10, 9, 5, 4) ); QTest::newRow("non-latin1 identifiers 3") // first code unit on line: / << _("class v" UC_U00FC UC_U4E8C UC_U10302 "\n" // 0 / 0 "{\n" // 17 / 12 "public:\n" // 19 / 14 " v" UC_U00FC UC_U4E8C UC_U10302 "();\n" // 27 / 22 "};\n") << (Tokens() // 45 / 35 << createToken(T_CLASS, 0, 5, 0, 5) // class << createToken(T_IDENTIFIER, 6, 10, 6, 5) // non-latin1 id << createToken(T_LBRACE, 17, 1, 12, 1) // { << createToken(T_PUBLIC, 19, 6, 14, 6) // public << createToken(T_COLON, 25, 1, 20, 1) // : << createToken(T_IDENTIFIER, 31, 10, 26, 5) // id << createToken(T_LPAREN, 41, 1, 31, 1) // ( << createToken(T_RPAREN, 42, 1, 32, 1) // ) << createToken(T_SEMICOLON, 43, 1, 33, 1) // ; << createToken(T_RBRACE, 45, 1, 35, 1) // } << createToken(T_SEMICOLON, 46, 1, 36, 1) // ; ); } void tst_SimpleLexer::incremental() { QFETCH(QByteArray, source); QFETCH(TokenKindList, expectedTokenKindList); run(source, toTokens(expectedTokenKindList), true, CompareKind); } void tst_SimpleLexer::incremental_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokenKindList"); QTest::newRow("simple_string_literal") << _("\"foo\"") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("unterminated_string_literal") << _("\"foo") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_1") << _("\"foo \\") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_2") << _("bar\"") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_with_spaces_1") << _("\"foo \\ ") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_with_spaces_2") << _("bar\"") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("double_escaped_string_literal_1") << _("\"foo \\") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("double_escaped_string_literal_2") << _("bar \\") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("double_escaped_string_literal_3") << _("baz\"") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("unterminated_escaped_string_literal") << _("\"foo \\\n\nbar\"") << (TokenKindList() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_with_newline_1") << _("\"foo \\") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_with_newline_2") << _("") << TokenKindList(); QTest::newRow("escaped_string_literal_with_newline_3") << _("bar") << (TokenKindList() << T_IDENTIFIER); QTest::newRow("escaped_string_literal_with_space_and_newline_single") << _("\"foo \\ \n bar\"") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_with_space_and_newline_1") << _("\"foo \\ \n ") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_with_space_and_newline_2") << _("bar") << (TokenKindList() << T_IDENTIFIER); QTest::newRow("token_after_escaped_string_literal_1") << _("\"foo \\") << (TokenKindList() << T_STRING_LITERAL); QTest::newRow("token_after_escaped_string_literal_2") << _("bar\";") << (TokenKindList() << T_STRING_LITERAL << T_SEMICOLON); QTest::newRow("multiline_raw_string_literal_1") << _("R\"delim(foo") << (TokenKindList() << T_RAW_STRING_LITERAL); QTest::newRow("multiline_raw_string_literal_2") << _("bar)delim\"") << (TokenKindList() << T_RAW_STRING_LITERAL); QTest::newRow("token_after_raw_string_literal_1") << _("R\"delim( )delim\"") << (TokenKindList() << T_RAW_STRING_LITERAL); QTest::newRow("token_after_raw_string_literal_2") << _(";") << (TokenKindList() << T_SEMICOLON); QTest::newRow("simple_cpp_comment") << _("//foo") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_1") << _("//foo \\") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_2") << _("bar") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_with_spaces_1") << _("//foo \\ ") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_with_spaces_2") << _("bar") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("double_escaped_cpp_comment_1") << _("//foo \\") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("double_escaped_cpp_comment_2") << _("bar \\") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("double_escaped_cpp_comment_3") << _("baz") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_with_newline") << _("//foo \\\n\nbar") << (TokenKindList() << T_CPP_COMMENT << T_IDENTIFIER); QTest::newRow("escaped_cpp_comment_with_newline_1") << _("//foo \\") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_with_newline_2") << _("") << TokenKindList(); QTest::newRow("escaped_cpp_comment_with_newline_3") << _("bar") << (TokenKindList() << T_IDENTIFIER); QTest::newRow("escaped_cpp_comment_with_space_and_newline_single") << _("//foo \\ \n bar") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_with_space_and_newline_1") << _("//foo \\ \n ") << (TokenKindList() << T_CPP_COMMENT); QTest::newRow("escaped_cpp_comment_with_space_and_newline_2") << _("bar") << (TokenKindList() << T_IDENTIFIER); } void tst_SimpleLexer::digraph() { QFETCH(QByteArray, source); QFETCH(TokenKindList, expectedTokenKindList); run(source, toTokens(expectedTokenKindList), false, CompareKind); } void tst_SimpleLexer::digraph_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokenKindList"); QTest::newRow("lbracket_digraph") << _("<:") << (TokenKindList() << T_LBRACKET); QTest::newRow("rbracket_digraph") << _(":>") << (TokenKindList() << T_RBRACKET); QTest::newRow("lbrace_digraph") << _("<%") << (TokenKindList() << T_LBRACE); QTest::newRow("rbrace_digraph") << _("%>") << (TokenKindList() << T_RBRACE); QTest::newRow("pound_digraph") << _("%:") << (TokenKindList() << T_POUND); QTest::newRow("pound_pound_digraph") << _("%:%:") << (TokenKindList() << T_POUND_POUND); QTest::newRow("pound_pound_mixed_digraph_1") << _("#%:") << (TokenKindList() << T_POUND << T_POUND); QTest::newRow("pound_pound_mixed_digraph_2") << _("%:#") << (TokenKindList() << T_POUND << T_POUND); QTest::newRow("lbracket_digraph_exception1") << _("<::") << (TokenKindList() << T_LESS << T_COLON_COLON); QTest::newRow("lbracket_digraph_exception2") << _("<::x") << (TokenKindList() << T_LESS << T_COLON_COLON << T_IDENTIFIER); QTest::newRow("lbracket_digraph_exception3") << _("<:::") << (TokenKindList() << T_LBRACKET << T_COLON_COLON); QTest::newRow("lbracket_digraph_exception4") << _("<::>") << (TokenKindList() << T_LBRACKET << T_RBRACKET); } void tst_SimpleLexer::trigraph() { QFETCH(QByteArray, source); QFETCH(TokenKindList, expectedTokenKindList); run(source, toTokens(expectedTokenKindList), false, CompareKind, true); } void tst_SimpleLexer::trigraph_data() { QTest::addColumn("source"); QTest::addColumn("expectedTokenKindList"); QTest::newRow("pound_trigraph") << _("?" "?=") << (TokenKindList() << T_POUND); QTest::newRow("caret_trigraph") << _("?" "?'") << (TokenKindList() << T_CARET); QTest::newRow("left_bracket_trigraph") << _("?" "?(") << (TokenKindList() << T_LBRACKET); QTest::newRow("right_bracket_trigraph") << _("?" "?)") << (TokenKindList() << T_RBRACKET); QTest::newRow("pipe_trigraph") << _("?" "?!") << (TokenKindList() << T_PIPE); QTest::newRow("left_brace_trigraph") << _("?" "?<") << (TokenKindList() << T_LBRACE); QTest::newRow("right_brace_trigraph") << _("?" "?>") << (TokenKindList() << T_RBRACE); QTest::newRow("tilde_trigraph") << _("?" "?-") << (TokenKindList() << T_TILDE); QTest::newRow("pound_pound_trigraph") << _("?" "?=" "?" "?=") << (TokenKindList() << T_POUND_POUND); QTest::newRow("caret_equal_trigraph") << _("?" "?'=") << (TokenKindList() << T_CARET_EQUAL); QTest::newRow("pipe_equal_trigraph") << _("?" "?!=") << (TokenKindList() << T_PIPE_EQUAL); QTest::newRow("tilde_equal_trigraph") << _("?" "?-=") << (TokenKindList() << T_TILDE_EQUAL); } QTEST_APPLESS_MAIN(tst_SimpleLexer) #include "tst_lexer.moc"