Imported WebKit commit 7e538425aa020340619e927792f3d895061fb54b (http://svn.webkit.org/repository/webkit/trunk@116286)

author: Simon Hausmann <simon.hausmann@nokia.com> 2012-05-07 11:21:11 +0200
committer: Simon Hausmann <simon.hausmann@nokia.com> 2012-05-07 11:21:11 +0200
commit: 2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47 (patch)
tree: 988e8c5b116dd0466244ae2fe5af8ee9be926d76 /Source/JavaScriptCore/parser/Lexer.cpp
parent: dd91e772430dc294e3bf478c119ef8d43c0a3358 (diff)
download: qtwebkit-2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47.tar.gz
1 files changed, 84 insertions, 84 deletions
diff --git a/Source/JavaScriptCore/parser/Lexer.cpp b/Source/JavaScriptCore/parser/Lexer.cpp
index 8d50afc54..3b020f4f2 100644
--- a/Source/JavaScriptCore/parser/Lexer.cpp
+++ b/Source/JavaScriptCore/parser/Lexer.cpp
@@ -1,6 +1,6 @@
 /*
  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
- *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
+ *  Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All Rights Reserved.
  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
@@ -368,7 +368,7 @@ Lexer<T>::~Lexer()
 }
 
 template <typename T>
-UString Lexer<T>::getInvalidCharMessage()
+UString Lexer<T>::invalidCharacterMessage() const
 {
     switch (m_current) {
     case 0:
@@ -386,7 +386,7 @@ UString Lexer<T>::getInvalidCharMessage()
     case 96:
         return "Invalid character: '`'";
     default:
-        return String::format("Invalid character '\\u%04u'", m_current).impl();
+        return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
     }
 }
 
@@ -403,7 +403,6 @@ void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
     m_arena = &arena->identifierArena();
     
     m_lineNumber = source.firstLine();
-    m_delimited = false;
     m_lastToken = -1;
     
     const StringImpl* sourceString = source.provider()->data();
@@ -426,7 +425,7 @@ void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
     if (LIKELY(m_code < m_codeEnd))
         m_current = *m_code;
     else
-        m_current = -1;
+        m_current = 0;
     ASSERT(currentOffset() == source.startOffset());
 }
 
@@ -440,29 +439,34 @@ template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
 template <typename T>
 ALWAYS_INLINE void Lexer<T>::shift()
 {
-    // Faster than an if-else sequence
-    ASSERT(m_current != -1);
-    m_current = -1;
-    m_code++;
+    // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
+    m_current = 0;
+    ++m_code;
     if (LIKELY(m_code < m_codeEnd))
         m_current = *m_code;
 }
 
 template <typename T>
-ALWAYS_INLINE int Lexer<T>::peek(int offset)
+ALWAYS_INLINE bool Lexer<T>::atEnd() const
+{
+    ASSERT(!m_current || m_code < m_codeEnd);
+    return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
+}
+
+template <typename T>
+ALWAYS_INLINE T Lexer<T>::peek(int offset) const
 {
-    // Only use if necessary
     ASSERT(offset > 0 && offset < 5);
     const T* code = m_code + offset;
-    return (code < m_codeEnd) ? *code : -1;
+    return (code < m_codeEnd) ? *code : 0;
 }
 
 template <typename T>
-int Lexer<T>::getUnicodeCharacter()
+int Lexer<T>::parseFourDigitUnicodeHex()
 {
-    int char1 = peek(1);
-    int char2 = peek(2);
-    int char3 = peek(3);
+    T char1 = peek(1);
+    T char2 = peek(2);
+    T char3 = peek(3);
 
     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
         return -1;
@@ -478,13 +482,13 @@ int Lexer<T>::getUnicodeCharacter()
 template <typename T>
 void Lexer<T>::shiftLineTerminator()
 {
-    ASSERT(isLineTerminator(static_cast<T>(m_current)));
+    ASSERT(isLineTerminator(m_current));
 
-    int m_prev = m_current;
+    T prev = m_current;
     shift();
 
     // Allow both CRLF and LFCR.
-    if (m_prev + m_current == '\n' + '\r')
+    if (prev + m_current == '\n' + '\r')
         shift();
 
     ++m_lineNumber;
@@ -629,12 +633,12 @@ template <typename T>
 inline void Lexer<T>::record16(int c)
 {
     ASSERT(c >= 0);
-    ASSERT(c <= USHRT_MAX);
+    ASSERT(c <= static_cast<int>(USHRT_MAX));
     m_buffer16.append(static_cast<UChar>(c));
 }
 
 template <>
-    template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
 {
     const ptrdiff_t remaining = m_codeEnd - m_code;
     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
@@ -647,7 +651,7 @@ template <>
 
     const LChar* identifierStart = currentCharacter();
     
-    while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current)))
+    while (isIdentPart(m_current))
         shift();
     
     if (UNLIKELY(m_current == '\\')) {
@@ -665,8 +669,6 @@ template <>
     } else
         tokenData->ident = 0;
 
-    m_delimited = false;
-
     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
         ASSERT(shouldCreateIdentifier);
         if (remaining < maxTokenLength) {
@@ -694,11 +696,12 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p
             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
         }
     }
+
     const UChar* identifierStart = currentCharacter();
 
     UChar orAllChars = 0;
     
-    while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) {
+    while (isIdentPart(m_current)) {
         orAllChars |= m_current;
         shift();
     }
@@ -726,8 +729,6 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p
     } else
         tokenData->ident = 0;
     
-    m_delimited = false;
-    
     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
         ASSERT(shouldCreateIdentifier);
         if (remaining < maxTokenLength) {
@@ -752,7 +753,7 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
     bool bufferRequired = false;
 
     while (true) {
-        if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) {
+        if (LIKELY(isIdentPart(m_current))) {
             shift();
             continue;
         }
@@ -767,7 +768,7 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
         if (UNLIKELY(m_current != 'u'))
             return ERRORTOK;
         shift();
-        int character = getUnicodeCharacter();
+        int character = parseFourDigitUnicodeHex();
         if (UNLIKELY(character == -1))
             return ERRORTOK;
         UChar ucharacter = static_cast<UChar>(character);
@@ -794,8 +795,6 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
     } else
         tokenData->ident = 0;
 
-    m_delimited = false;
-
     if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
         ASSERT(shouldCreateIdentifier);
         // Keywords must not be recognized if there was an \uXXXX in the identifier.
@@ -814,18 +813,28 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
     return IDENT;
 }
 
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
+{
+    return character < 0xE;
+}
+
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
+{
+    return character < 0xE || character > 0xFF;
+}
+
 template <typename T>
 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
 {
     int startingOffset = currentOffset();
     int startingLineNumber = lineNumber();
-    int stringQuoteCharacter = m_current;
+    T stringQuoteCharacter = m_current;
     shift();
 
     const T* stringStart = currentCharacter();
 
     while (m_current != stringQuoteCharacter) {
-        if (UNLIKELY((m_current == '\\'))) {
+        if (UNLIKELY(m_current == '\\')) {
             if (stringStart != currentCharacter() && shouldBuildStrings)
                 append8(stringStart, currentCharacter() - stringStart);
             shift();
@@ -845,7 +854,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo
                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
                     return false;
                 }
-                int prev = m_current;
+                T prev = m_current;
                 shift();
                 if (shouldBuildStrings)
                     record8(convertHex(prev, m_current));
@@ -860,7 +869,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo
             continue;
         }
 
-        if (UNLIKELY(((m_current > 0xff) || (m_current < 0xe)))) {
+        if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
             setOffset(startingOffset);
             setLineNumber(startingLineNumber);
             m_buffer8.resize(0);
@@ -884,7 +893,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo
 template <typename T>
 template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
 {
-    int stringQuoteCharacter = m_current;
+    T stringQuoteCharacter = m_current;
     shift();
 
     const T* stringStart = currentCharacter();
@@ -902,7 +911,7 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
                 if (shouldBuildStrings)
                     record16(escape);
                 shift();
-            } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current))))
+            } else if (UNLIKELY(isLineTerminator(m_current)))
                 shiftLineTerminator();
             else if (m_current == 'x') {
                 shift();
@@ -910,14 +919,14 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
                     return false;
                 }
-                int prev = m_current;
+                T prev = m_current;
                 shift();
                 if (shouldBuildStrings)
                     record16(convertHex(prev, m_current));
                 shift();
             } else if (m_current == 'u') {
                 shift();
-                int character = getUnicodeCharacter();
+                int character = parseFourDigitUnicodeHex();
                 if (character != -1) {
                     if (shouldBuildStrings)
                         record16(character);
@@ -940,11 +949,11 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
                     record16(0);
             } else if (!strictMode && isASCIIOctalDigit(m_current)) {
                 // Octal character sequences
-                int character1 = m_current;
+                T character1 = m_current;
                 shift();
                 if (isASCIIOctalDigit(m_current)) {
                     // Two octal characters
-                    int character2 = m_current;
+                    T character2 = m_current;
                     shift();
                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
                         if (shouldBuildStrings)
@@ -958,7 +967,7 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
                     if (shouldBuildStrings)
                         record16(character1 - '0');
                 }
-            } else if (m_current != -1) {
+            } else if (!atEnd()) {
                 if (shouldBuildStrings)
                     record16(m_current);
                 shift();
@@ -971,11 +980,11 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
             continue;
         }
         // Fast check for characters that require special handling.
-        // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
+        // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
         // as possible, and lets through all common ASCII characters.
         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
             // New-line or end of input is not allowed
-            if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) {
+            if (atEnd() || isLineTerminator(m_current)) {
                 m_lexErrorMessage = "Unexpected EOF";
                 return false;
             }
@@ -1152,10 +1161,10 @@ ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
             }
         }
 
-        if (UNLIKELY(m_current == -1))
+        if (atEnd())
             return false;
 
-        if (isLineTerminator(static_cast<T>(m_current))) {
+        if (isLineTerminator(m_current)) {
             shiftLineTerminator();
             m_terminator = true;
         } else
@@ -1184,22 +1193,20 @@ JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsign
     m_terminator = false;
 
 start:
-    while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current)))
+    while (isWhiteSpace(m_current))
         shift();
 
-    int startOffset = currentOffset();
-
-    if (UNLIKELY(m_current == -1))
+    if (atEnd())
         return EOFTOK;
-
-    m_delimited = false;
+    
+    tokenInfo->startOffset = currentOffset();
 
     CharacterType type;
-    if (LIKELY(isLatin1(static_cast<T>(m_current))))
+    if (LIKELY(isLatin1(m_current)))
         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
     else if (isNonLatin1IdentStart(m_current))
         type = CharacterIdentifierStart;
-    else if (isLineTerminator(static_cast<T>(m_current)))
+    else if (isLineTerminator(m_current))
         type = CharacterLineTerminator;
     else
         type = CharacterInvalid;
@@ -1425,7 +1432,6 @@ start:
         shift();
         break;
     case CharacterSemicolon:
-        m_delimited = true;
         shift();
         token = SEMICOLON;
         break;
@@ -1436,7 +1442,6 @@ start:
         break;
     case CharacterCloseBrace:
         tokenData->intValue = currentOffset();
-        m_delimited = true;
         shift();
         token = CLOSEBRACE;
         break;
@@ -1473,25 +1478,24 @@ start:
 inNumberAfterDecimalPoint:
                     parseNumberAfterDecimalPoint();
                 }
-                if ((m_current | 0x20) == 'e')
+                if ((m_current | 0x20) == 'e') {
                     if (!parseNumberAfterExponentIndicator()) {
                         m_lexErrorMessage = "Non-number found after exponent indicator";
                         goto returnError;
                     }
-                // Null-terminate string for strtod.
-                m_buffer8.append('\0');
-                tokenData->doubleValue = WTF::strtod<WTF::AllowTrailingJunk>(reinterpret_cast<const char*>(m_buffer8.data()), 0);
+                }
+                size_t parsedLength;
+                tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
             }
             token = NUMBER;
         }
 
         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
-        if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) {
+        if (UNLIKELY(isIdentStart(m_current))) {
             m_lexErrorMessage = "At least one digit must occur after a decimal point";
             goto returnError;
         }
         m_buffer8.resize(0);
-        m_delimited = false;
         break;
     case CharacterQuote:
         if (lexerFlags & LexerFlagsDontBuildStrings) {
@@ -1502,11 +1506,10 @@ inNumberAfterDecimalPoint:
                 goto returnError;
         }
         shift();
-        m_delimited = false;
         token = STRING;
         break;
     case CharacterIdentifierStart:
-        ASSERT(isIdentStart(static_cast<T>(m_current)));
+        ASSERT(isIdentStart(m_current));
         // Fall through into CharacterBackSlash.
     case CharacterBackSlash:
         if (lexerFlags & LexexFlagsDontBuildKeywords)
@@ -1515,13 +1518,13 @@ inNumberAfterDecimalPoint:
             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
         break;
     case CharacterLineTerminator:
-        ASSERT(isLineTerminator(static_cast<T>(m_current)));
+        ASSERT(isLineTerminator(m_current));
         shiftLineTerminator();
         m_atLineStart = true;
         m_terminator = true;
         goto start;
     case CharacterInvalid:
-        m_lexErrorMessage = getInvalidCharMessage();
+        m_lexErrorMessage = invalidCharacterMessage();
         goto returnError;
     default:
         ASSERT_NOT_REACHED();
@@ -1533,8 +1536,8 @@ inNumberAfterDecimalPoint:
     goto returnToken;
 
 inSingleLineComment:
-    while (!isLineTerminator(static_cast<T>(m_current))) {
-        if (UNLIKELY(m_current == -1))
+    while (!isLineTerminator(m_current)) {
+        if (atEnd())
             return EOFTOK;
         shift();
     }
@@ -1545,12 +1548,10 @@ inSingleLineComment:
         goto start;
 
     token = SEMICOLON;
-    m_delimited = true;
     // Fall through into returnToken.
 
 returnToken:
     tokenInfo->line = m_lineNumber;
-    tokenInfo->startOffset = startOffset;
     tokenInfo->endOffset = currentOffset();
     m_lastToken = token;
     return token;
@@ -1558,7 +1559,6 @@ returnToken:
 returnError:
     m_error = true;
     tokenInfo->line = m_lineNumber;
-    tokenInfo->startOffset = startOffset;
     tokenInfo->endOffset = currentOffset();
     return ERRORTOK;
 }
@@ -1579,26 +1579,26 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags,
     }
 
     while (true) {
-        int current = m_current;
-
-        if (isLineTerminator(static_cast<T>(current)) || current == -1) {
+        if (isLineTerminator(m_current) || atEnd()) {
             m_buffer16.resize(0);
             return false;
         }
 
+        T prev = m_current;
+        
         shift();
 
-        if (current == '/' && !lastWasEscape && !inBrackets)
+        if (prev == '/' && !lastWasEscape && !inBrackets)
             break;
 
-        record16(current);
+        record16(prev);
 
         if (lastWasEscape) {
             lastWasEscape = false;
             continue;
         }
 
-        switch (current) {
+        switch (prev) {
         case '[':
             inBrackets = true;
             break;
@@ -1614,7 +1614,7 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags,
     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
     m_buffer16.resize(0);
 
-    while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) {
+    while (isIdentPart(m_current)) {
         record16(m_current);
         shift();
     }
@@ -1632,14 +1632,14 @@ bool Lexer<T>::skipRegExp()
     bool inBrackets = false;
 
     while (true) {
-        int current = m_current;
-
-        if (isLineTerminator(static_cast<T>(current)) || current == -1)
+        if (isLineTerminator(m_current) || atEnd())
             return false;
 
+        T prev = m_current;
+        
         shift();
 
-        if (current == '/' && !lastWasEscape && !inBrackets)
+        if (prev == '/' && !lastWasEscape && !inBrackets)
             break;
 
         if (lastWasEscape) {
@@ -1647,7 +1647,7 @@ bool Lexer<T>::skipRegExp()
             continue;
         }
 
-        switch (current) {
+        switch (prev) {
         case '[':
             inBrackets = true;
             break;
@@ -1660,7 +1660,7 @@ bool Lexer<T>::skipRegExp()
         }
     }
 
-    while (m_current != -1 && isIdentPart(static_cast<T>(m_current)))
+    while (isIdentPart(m_current))
         shift();
 
     return true;
author	Simon Hausmann <simon.hausmann@nokia.com>	2012-05-07 11:21:11 +0200
committer	Simon Hausmann <simon.hausmann@nokia.com>	2012-05-07 11:21:11 +0200
commit	2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47 (patch)
tree	988e8c5b116dd0466244ae2fe5af8ee9be926d76 /Source/JavaScriptCore/parser/Lexer.cpp
parent	dd91e772430dc294e3bf478c119ef8d43c0a3358 (diff)
download	qtwebkit-2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47.tar.gz