diff options
author | Simon Hausmann <simon.hausmann@nokia.com> | 2012-05-07 11:21:11 +0200 |
---|---|---|
committer | Simon Hausmann <simon.hausmann@nokia.com> | 2012-05-07 11:21:11 +0200 |
commit | 2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47 (patch) | |
tree | 988e8c5b116dd0466244ae2fe5af8ee9be926d76 /Source/JavaScriptCore/parser/Lexer.cpp | |
parent | dd91e772430dc294e3bf478c119ef8d43c0a3358 (diff) | |
download | qtwebkit-2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47.tar.gz |
Imported WebKit commit 7e538425aa020340619e927792f3d895061fb54b (http://svn.webkit.org/repository/webkit/trunk@116286)
Diffstat (limited to 'Source/JavaScriptCore/parser/Lexer.cpp')
-rw-r--r-- | Source/JavaScriptCore/parser/Lexer.cpp | 168 |
1 files changed, 84 insertions, 84 deletions
diff --git a/Source/JavaScriptCore/parser/Lexer.cpp b/Source/JavaScriptCore/parser/Lexer.cpp index 8d50afc54..3b020f4f2 100644 --- a/Source/JavaScriptCore/parser/Lexer.cpp +++ b/Source/JavaScriptCore/parser/Lexer.cpp @@ -1,6 +1,6 @@ /* * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) - * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. + * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All Rights Reserved. * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu) * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be) @@ -368,7 +368,7 @@ Lexer<T>::~Lexer() } template <typename T> -UString Lexer<T>::getInvalidCharMessage() +UString Lexer<T>::invalidCharacterMessage() const { switch (m_current) { case 0: @@ -386,7 +386,7 @@ UString Lexer<T>::getInvalidCharMessage() case 96: return "Invalid character: '`'"; default: - return String::format("Invalid character '\\u%04u'", m_current).impl(); + return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl(); } } @@ -403,7 +403,6 @@ void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena) m_arena = &arena->identifierArena(); m_lineNumber = source.firstLine(); - m_delimited = false; m_lastToken = -1; const StringImpl* sourceString = source.provider()->data(); @@ -426,7 +425,7 @@ void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena) if (LIKELY(m_code < m_codeEnd)) m_current = *m_code; else - m_current = -1; + m_current = 0; ASSERT(currentOffset() == source.startOffset()); } @@ -440,29 +439,34 @@ template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift() template <typename T> ALWAYS_INLINE void Lexer<T>::shift() { - // Faster than an if-else sequence - ASSERT(m_current != -1); - m_current = -1; - m_code++; + // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence. + m_current = 0; + ++m_code; if (LIKELY(m_code < m_codeEnd)) m_current = *m_code; } template <typename T> -ALWAYS_INLINE int Lexer<T>::peek(int offset) +ALWAYS_INLINE bool Lexer<T>::atEnd() const +{ + ASSERT(!m_current || m_code < m_codeEnd); + return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd); +} + +template <typename T> +ALWAYS_INLINE T Lexer<T>::peek(int offset) const { - // Only use if necessary ASSERT(offset > 0 && offset < 5); const T* code = m_code + offset; - return (code < m_codeEnd) ? *code : -1; + return (code < m_codeEnd) ? *code : 0; } template <typename T> -int Lexer<T>::getUnicodeCharacter() +int Lexer<T>::parseFourDigitUnicodeHex() { - int char1 = peek(1); - int char2 = peek(2); - int char3 = peek(3); + T char1 = peek(1); + T char2 = peek(2); + T char3 = peek(3); if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3))) return -1; @@ -478,13 +482,13 @@ int Lexer<T>::getUnicodeCharacter() template <typename T> void Lexer<T>::shiftLineTerminator() { - ASSERT(isLineTerminator(static_cast<T>(m_current))); + ASSERT(isLineTerminator(m_current)); - int m_prev = m_current; + T prev = m_current; shift(); // Allow both CRLF and LFCR. - if (m_prev + m_current == '\n' + '\r') + if (prev + m_current == '\n' + '\r') shift(); ++m_lineNumber; @@ -629,12 +633,12 @@ template <typename T> inline void Lexer<T>::record16(int c) { ASSERT(c >= 0); - ASSERT(c <= USHRT_MAX); + ASSERT(c <= static_cast<int>(USHRT_MAX)); m_buffer16.append(static_cast<UChar>(c)); } template <> - template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode) +template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode) { const ptrdiff_t remaining = m_codeEnd - m_code; if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) { @@ -647,7 +651,7 @@ template <> const LChar* identifierStart = currentCharacter(); - while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current))) + while (isIdentPart(m_current)) shift(); if (UNLIKELY(m_current == '\\')) { @@ -665,8 +669,6 @@ template <> } else tokenData->ident = 0; - m_delimited = false; - if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) { ASSERT(shouldCreateIdentifier); if (remaining < maxTokenLength) { @@ -694,11 +696,12 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword; } } + const UChar* identifierStart = currentCharacter(); UChar orAllChars = 0; - while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) { + while (isIdentPart(m_current)) { orAllChars |= m_current; shift(); } @@ -726,8 +729,6 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p } else tokenData->ident = 0; - m_delimited = false; - if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) { ASSERT(shouldCreateIdentifier); if (remaining < maxTokenLength) { @@ -752,7 +753,7 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow bool bufferRequired = false; while (true) { - if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) { + if (LIKELY(isIdentPart(m_current))) { shift(); continue; } @@ -767,7 +768,7 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow if (UNLIKELY(m_current != 'u')) return ERRORTOK; shift(); - int character = getUnicodeCharacter(); + int character = parseFourDigitUnicodeHex(); if (UNLIKELY(character == -1)) return ERRORTOK; UChar ucharacter = static_cast<UChar>(character); @@ -794,8 +795,6 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow } else tokenData->ident = 0; - m_delimited = false; - if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) { ASSERT(shouldCreateIdentifier); // Keywords must not be recognized if there was an \uXXXX in the identifier. @@ -814,18 +813,28 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow return IDENT; } +static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character) +{ + return character < 0xE; +} + +static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character) +{ + return character < 0xE || character > 0xFF; +} + template <typename T> template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode) { int startingOffset = currentOffset(); int startingLineNumber = lineNumber(); - int stringQuoteCharacter = m_current; + T stringQuoteCharacter = m_current; shift(); const T* stringStart = currentCharacter(); while (m_current != stringQuoteCharacter) { - if (UNLIKELY((m_current == '\\'))) { + if (UNLIKELY(m_current == '\\')) { if (stringStart != currentCharacter() && shouldBuildStrings) append8(stringStart, currentCharacter() - stringStart); shift(); @@ -845,7 +854,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo m_lexErrorMessage = "\\x can only be followed by a hex character sequence"; return false; } - int prev = m_current; + T prev = m_current; shift(); if (shouldBuildStrings) record8(convertHex(prev, m_current)); @@ -860,7 +869,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo continue; } - if (UNLIKELY(((m_current > 0xff) || (m_current < 0xe)))) { + if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) { setOffset(startingOffset); setLineNumber(startingLineNumber); m_buffer8.resize(0); @@ -884,7 +893,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo template <typename T> template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) { - int stringQuoteCharacter = m_current; + T stringQuoteCharacter = m_current; shift(); const T* stringStart = currentCharacter(); @@ -902,7 +911,7 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat if (shouldBuildStrings) record16(escape); shift(); - } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) + } else if (UNLIKELY(isLineTerminator(m_current))) shiftLineTerminator(); else if (m_current == 'x') { shift(); @@ -910,14 +919,14 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat m_lexErrorMessage = "\\x can only be followed by a hex character sequence"; return false; } - int prev = m_current; + T prev = m_current; shift(); if (shouldBuildStrings) record16(convertHex(prev, m_current)); shift(); } else if (m_current == 'u') { shift(); - int character = getUnicodeCharacter(); + int character = parseFourDigitUnicodeHex(); if (character != -1) { if (shouldBuildStrings) record16(character); @@ -940,11 +949,11 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat record16(0); } else if (!strictMode && isASCIIOctalDigit(m_current)) { // Octal character sequences - int character1 = m_current; + T character1 = m_current; shift(); if (isASCIIOctalDigit(m_current)) { // Two octal characters - int character2 = m_current; + T character2 = m_current; shift(); if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) { if (shouldBuildStrings) @@ -958,7 +967,7 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat if (shouldBuildStrings) record16(character1 - '0'); } - } else if (m_current != -1) { + } else if (!atEnd()) { if (shouldBuildStrings) record16(m_current); shift(); @@ -971,11 +980,11 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat continue; } // Fast check for characters that require special handling. - // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently + // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently // as possible, and lets through all common ASCII characters. if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { // New-line or end of input is not allowed - if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) { + if (atEnd() || isLineTerminator(m_current)) { m_lexErrorMessage = "Unexpected EOF"; return false; } @@ -1152,10 +1161,10 @@ ALWAYS_INLINE bool Lexer<T>::parseMultilineComment() } } - if (UNLIKELY(m_current == -1)) + if (atEnd()) return false; - if (isLineTerminator(static_cast<T>(m_current))) { + if (isLineTerminator(m_current)) { shiftLineTerminator(); m_terminator = true; } else @@ -1184,22 +1193,20 @@ JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsign m_terminator = false; start: - while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current))) + while (isWhiteSpace(m_current)) shift(); - int startOffset = currentOffset(); - - if (UNLIKELY(m_current == -1)) + if (atEnd()) return EOFTOK; - - m_delimited = false; + + tokenInfo->startOffset = currentOffset(); CharacterType type; - if (LIKELY(isLatin1(static_cast<T>(m_current)))) + if (LIKELY(isLatin1(m_current))) type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]); else if (isNonLatin1IdentStart(m_current)) type = CharacterIdentifierStart; - else if (isLineTerminator(static_cast<T>(m_current))) + else if (isLineTerminator(m_current)) type = CharacterLineTerminator; else type = CharacterInvalid; @@ -1425,7 +1432,6 @@ start: shift(); break; case CharacterSemicolon: - m_delimited = true; shift(); token = SEMICOLON; break; @@ -1436,7 +1442,6 @@ start: break; case CharacterCloseBrace: tokenData->intValue = currentOffset(); - m_delimited = true; shift(); token = CLOSEBRACE; break; @@ -1473,25 +1478,24 @@ start: inNumberAfterDecimalPoint: parseNumberAfterDecimalPoint(); } - if ((m_current | 0x20) == 'e') + if ((m_current | 0x20) == 'e') { if (!parseNumberAfterExponentIndicator()) { m_lexErrorMessage = "Non-number found after exponent indicator"; goto returnError; } - // Null-terminate string for strtod. - m_buffer8.append('\0'); - tokenData->doubleValue = WTF::strtod<WTF::AllowTrailingJunk>(reinterpret_cast<const char*>(m_buffer8.data()), 0); + } + size_t parsedLength; + tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength); } token = NUMBER; } // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. - if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) { + if (UNLIKELY(isIdentStart(m_current))) { m_lexErrorMessage = "At least one digit must occur after a decimal point"; goto returnError; } m_buffer8.resize(0); - m_delimited = false; break; case CharacterQuote: if (lexerFlags & LexerFlagsDontBuildStrings) { @@ -1502,11 +1506,10 @@ inNumberAfterDecimalPoint: goto returnError; } shift(); - m_delimited = false; token = STRING; break; case CharacterIdentifierStart: - ASSERT(isIdentStart(static_cast<T>(m_current))); + ASSERT(isIdentStart(m_current)); // Fall through into CharacterBackSlash. case CharacterBackSlash: if (lexerFlags & LexexFlagsDontBuildKeywords) @@ -1515,13 +1518,13 @@ inNumberAfterDecimalPoint: token = parseIdentifier<true>(tokenData, lexerFlags, strictMode); break; case CharacterLineTerminator: - ASSERT(isLineTerminator(static_cast<T>(m_current))); + ASSERT(isLineTerminator(m_current)); shiftLineTerminator(); m_atLineStart = true; m_terminator = true; goto start; case CharacterInvalid: - m_lexErrorMessage = getInvalidCharMessage(); + m_lexErrorMessage = invalidCharacterMessage(); goto returnError; default: ASSERT_NOT_REACHED(); @@ -1533,8 +1536,8 @@ inNumberAfterDecimalPoint: goto returnToken; inSingleLineComment: - while (!isLineTerminator(static_cast<T>(m_current))) { - if (UNLIKELY(m_current == -1)) + while (!isLineTerminator(m_current)) { + if (atEnd()) return EOFTOK; shift(); } @@ -1545,12 +1548,10 @@ inSingleLineComment: goto start; token = SEMICOLON; - m_delimited = true; // Fall through into returnToken. returnToken: tokenInfo->line = m_lineNumber; - tokenInfo->startOffset = startOffset; tokenInfo->endOffset = currentOffset(); m_lastToken = token; return token; @@ -1558,7 +1559,6 @@ returnToken: returnError: m_error = true; tokenInfo->line = m_lineNumber; - tokenInfo->startOffset = startOffset; tokenInfo->endOffset = currentOffset(); return ERRORTOK; } @@ -1579,26 +1579,26 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, } while (true) { - int current = m_current; - - if (isLineTerminator(static_cast<T>(current)) || current == -1) { + if (isLineTerminator(m_current) || atEnd()) { m_buffer16.resize(0); return false; } + T prev = m_current; + shift(); - if (current == '/' && !lastWasEscape && !inBrackets) + if (prev == '/' && !lastWasEscape && !inBrackets) break; - record16(current); + record16(prev); if (lastWasEscape) { lastWasEscape = false; continue; } - switch (current) { + switch (prev) { case '[': inBrackets = true; break; @@ -1614,7 +1614,7 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size()); m_buffer16.resize(0); - while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) { + while (isIdentPart(m_current)) { record16(m_current); shift(); } @@ -1632,14 +1632,14 @@ bool Lexer<T>::skipRegExp() bool inBrackets = false; while (true) { - int current = m_current; - - if (isLineTerminator(static_cast<T>(current)) || current == -1) + if (isLineTerminator(m_current) || atEnd()) return false; + T prev = m_current; + shift(); - if (current == '/' && !lastWasEscape && !inBrackets) + if (prev == '/' && !lastWasEscape && !inBrackets) break; if (lastWasEscape) { @@ -1647,7 +1647,7 @@ bool Lexer<T>::skipRegExp() continue; } - switch (current) { + switch (prev) { case '[': inBrackets = true; break; @@ -1660,7 +1660,7 @@ bool Lexer<T>::skipRegExp() } } - while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) + while (isIdentPart(m_current)) shift(); return true; |