Imported WebKit commit 3db4eb1820ac8fb03065d7ea73a4d9db1e8fea1a (http://svn.webkit.org/repository/webkit/trunk@110422)

This includes build fixes for the latest qtbase/qtdeclarative as well as the final QML2 API.
author: Simon Hausmann <simon.hausmann@nokia.com> 2012-03-12 14:11:15 +0100
committer: Simon Hausmann <simon.hausmann@nokia.com> 2012-03-12 14:11:15 +0100
commit: dd91e772430dc294e3bf478c119ef8d43c0a3358 (patch)
tree: 6f33ce4d5872a5691e0291eb45bf6ab373a5f567 /Source/JavaScriptCore/parser/Lexer.cpp
parent: ad0d549d4cc13433f77c1ac8f0ab379c83d93f28 (diff)
download: qtwebkit-dd91e772430dc294e3bf478c119ef8d43c0a3358.tar.gz
1 files changed, 198 insertions, 47 deletions
diff --git a/Source/JavaScriptCore/parser/Lexer.cpp b/Source/JavaScriptCore/parser/Lexer.cpp
index 015c1509d..8d50afc54 100644
--- a/Source/JavaScriptCore/parser/Lexer.cpp
+++ b/Source/JavaScriptCore/parser/Lexer.cpp
@@ -31,7 +31,7 @@
 #include "Identifier.h"
 #include "NodeInfo.h"
 #include "Nodes.h"
-#include "dtoa.h"
+#include <wtf/dtoa.h>
 #include <ctype.h>
 #include <limits.h>
 #include <string.h>
@@ -95,8 +95,8 @@ enum CharacterType {
     CharacterWhiteSpace,
 };
 
-// 128 ASCII codes
-static const unsigned short typesOfASCIICharacters[128] = {
+// 256 Latin-1 codes
+static const unsigned short typesOfLatin1Characters[256] = {
 /*   0 - Null               */ CharacterInvalid,
 /*   1 - Start of Heading   */ CharacterInvalid,
 /*   2 - Start of Text      */ CharacterInvalid,
@@ -225,6 +225,134 @@ static const unsigned short typesOfASCIICharacters[128] = {
 /* 125 - }                  */ CharacterCloseBrace,
 /* 126 - ~                  */ CharacterTilde,
 /* 127 - Delete             */ CharacterInvalid,
+/* 128 - Cc category        */ CharacterInvalid,
+/* 129 - Cc category        */ CharacterInvalid,
+/* 130 - Cc category        */ CharacterInvalid,
+/* 131 - Cc category        */ CharacterInvalid,
+/* 132 - Cc category        */ CharacterInvalid,
+/* 133 - Cc category        */ CharacterInvalid,
+/* 134 - Cc category        */ CharacterInvalid,
+/* 135 - Cc category        */ CharacterInvalid,
+/* 136 - Cc category        */ CharacterInvalid,
+/* 137 - Cc category        */ CharacterInvalid,
+/* 138 - Cc category        */ CharacterInvalid,
+/* 139 - Cc category        */ CharacterInvalid,
+/* 140 - Cc category        */ CharacterInvalid,
+/* 141 - Cc category        */ CharacterInvalid,
+/* 142 - Cc category        */ CharacterInvalid,
+/* 143 - Cc category        */ CharacterInvalid,
+/* 144 - Cc category        */ CharacterInvalid,
+/* 145 - Cc category        */ CharacterInvalid,
+/* 146 - Cc category        */ CharacterInvalid,
+/* 147 - Cc category        */ CharacterInvalid,
+/* 148 - Cc category        */ CharacterInvalid,
+/* 149 - Cc category        */ CharacterInvalid,
+/* 150 - Cc category        */ CharacterInvalid,
+/* 151 - Cc category        */ CharacterInvalid,
+/* 152 - Cc category        */ CharacterInvalid,
+/* 153 - Cc category        */ CharacterInvalid,
+/* 154 - Cc category        */ CharacterInvalid,
+/* 155 - Cc category        */ CharacterInvalid,
+/* 156 - Cc category        */ CharacterInvalid,
+/* 157 - Cc category        */ CharacterInvalid,
+/* 158 - Cc category        */ CharacterInvalid,
+/* 159 - Cc category        */ CharacterInvalid,
+/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
+/* 161 - Po category        */ CharacterInvalid,
+/* 162 - Sc category        */ CharacterInvalid,
+/* 163 - Sc category        */ CharacterInvalid,
+/* 164 - Sc category        */ CharacterInvalid,
+/* 165 - Sc category        */ CharacterInvalid,
+/* 166 - So category        */ CharacterInvalid,
+/* 167 - So category        */ CharacterInvalid,
+/* 168 - Sk category        */ CharacterInvalid,
+/* 169 - So category        */ CharacterInvalid,
+/* 170 - Ll category        */ CharacterIdentifierStart,
+/* 171 - Pi category        */ CharacterInvalid,
+/* 172 - Sm category        */ CharacterInvalid,
+/* 173 - Cf category        */ CharacterInvalid,
+/* 174 - So category        */ CharacterInvalid,
+/* 175 - Sk category        */ CharacterInvalid,
+/* 176 - So category        */ CharacterInvalid,
+/* 177 - Sm category        */ CharacterInvalid,
+/* 178 - No category        */ CharacterInvalid,
+/* 179 - No category        */ CharacterInvalid,
+/* 180 - Sk category        */ CharacterInvalid,
+/* 181 - Ll category        */ CharacterIdentifierStart,
+/* 182 - So category        */ CharacterInvalid,
+/* 183 - Po category        */ CharacterInvalid,
+/* 184 - Sk category        */ CharacterInvalid,
+/* 185 - No category        */ CharacterInvalid,
+/* 186 - Ll category        */ CharacterIdentifierStart,
+/* 187 - Pf category        */ CharacterInvalid,
+/* 188 - No category        */ CharacterInvalid,
+/* 189 - No category        */ CharacterInvalid,
+/* 190 - No category        */ CharacterInvalid,
+/* 191 - Po category        */ CharacterInvalid,
+/* 192 - Lu category        */ CharacterIdentifierStart,
+/* 193 - Lu category        */ CharacterIdentifierStart,
+/* 194 - Lu category        */ CharacterIdentifierStart,
+/* 195 - Lu category        */ CharacterIdentifierStart,
+/* 196 - Lu category        */ CharacterIdentifierStart,
+/* 197 - Lu category        */ CharacterIdentifierStart,
+/* 198 - Lu category        */ CharacterIdentifierStart,
+/* 199 - Lu category        */ CharacterIdentifierStart,
+/* 200 - Lu category        */ CharacterIdentifierStart,
+/* 201 - Lu category        */ CharacterIdentifierStart,
+/* 202 - Lu category        */ CharacterIdentifierStart,
+/* 203 - Lu category        */ CharacterIdentifierStart,
+/* 204 - Lu category        */ CharacterIdentifierStart,
+/* 205 - Lu category        */ CharacterIdentifierStart,
+/* 206 - Lu category        */ CharacterIdentifierStart,
+/* 207 - Lu category        */ CharacterIdentifierStart,
+/* 208 - Lu category        */ CharacterIdentifierStart,
+/* 209 - Lu category        */ CharacterIdentifierStart,
+/* 210 - Lu category        */ CharacterIdentifierStart,
+/* 211 - Lu category        */ CharacterIdentifierStart,
+/* 212 - Lu category        */ CharacterIdentifierStart,
+/* 213 - Lu category        */ CharacterIdentifierStart,
+/* 214 - Lu category        */ CharacterIdentifierStart,
+/* 215 - Sm category        */ CharacterInvalid,
+/* 216 - Lu category        */ CharacterIdentifierStart,
+/* 217 - Lu category        */ CharacterIdentifierStart,
+/* 218 - Lu category        */ CharacterIdentifierStart,
+/* 219 - Lu category        */ CharacterIdentifierStart,
+/* 220 - Lu category        */ CharacterIdentifierStart,
+/* 221 - Lu category        */ CharacterIdentifierStart,
+/* 222 - Lu category        */ CharacterIdentifierStart,
+/* 223 - Ll category        */ CharacterIdentifierStart,
+/* 224 - Ll category        */ CharacterIdentifierStart,
+/* 225 - Ll category        */ CharacterIdentifierStart,
+/* 226 - Ll category        */ CharacterIdentifierStart,
+/* 227 - Ll category        */ CharacterIdentifierStart,
+/* 228 - Ll category        */ CharacterIdentifierStart,
+/* 229 - Ll category        */ CharacterIdentifierStart,
+/* 230 - Ll category        */ CharacterIdentifierStart,
+/* 231 - Ll category        */ CharacterIdentifierStart,
+/* 232 - Ll category        */ CharacterIdentifierStart,
+/* 233 - Ll category        */ CharacterIdentifierStart,
+/* 234 - Ll category        */ CharacterIdentifierStart,
+/* 235 - Ll category        */ CharacterIdentifierStart,
+/* 236 - Ll category        */ CharacterIdentifierStart,
+/* 237 - Ll category        */ CharacterIdentifierStart,
+/* 238 - Ll category        */ CharacterIdentifierStart,
+/* 239 - Ll category        */ CharacterIdentifierStart,
+/* 240 - Ll category        */ CharacterIdentifierStart,
+/* 241 - Ll category        */ CharacterIdentifierStart,
+/* 242 - Ll category        */ CharacterIdentifierStart,
+/* 243 - Ll category        */ CharacterIdentifierStart,
+/* 244 - Ll category        */ CharacterIdentifierStart,
+/* 245 - Ll category        */ CharacterIdentifierStart,
+/* 246 - Ll category        */ CharacterIdentifierStart,
+/* 247 - Sm category        */ CharacterInvalid,
+/* 248 - Ll category        */ CharacterIdentifierStart,
+/* 249 - Ll category        */ CharacterIdentifierStart,
+/* 250 - Ll category        */ CharacterIdentifierStart,
+/* 251 - Ll category        */ CharacterIdentifierStart,
+/* 252 - Ll category        */ CharacterIdentifierStart,
+/* 253 - Ll category        */ CharacterIdentifierStart,
+/* 254 - Ll category        */ CharacterIdentifierStart,
+/* 255 - Ll category        */ CharacterIdentifierStart
 };
 
 template <typename T>
@@ -350,7 +478,7 @@ int Lexer<T>::getUnicodeCharacter()
 template <typename T>
 void Lexer<T>::shiftLineTerminator()
 {
-    ASSERT(isLineTerminator(m_current));
+    ASSERT(isLineTerminator(static_cast<T>(m_current)));
 
     int m_prev = m_current;
     shift();
@@ -368,28 +496,48 @@ ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
 }
 
-static NEVER_INLINE bool isNonASCIIIdentStart(int c)
+static NEVER_INLINE bool isNonLatin1IdentStart(int c)
 {
     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
 }
 
-static inline bool isIdentStart(int c)
+static ALWAYS_INLINE bool isLatin1(LChar)
 {
-    return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c);
+    return true;
+}
+
+static ALWAYS_INLINE bool isLatin1(UChar c)
+{
+    return c < 256;
 }
 
-static NEVER_INLINE bool isNonASCIIIdentPart(int c)
+static inline bool isIdentStart(LChar c)
+{
+    return typesOfLatin1Characters[c] == CharacterIdentifierStart;
+}
+
+static inline bool isIdentStart(UChar c)
+{
+    return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
+}
+
+static NEVER_INLINE bool isNonLatin1IdentPart(int c)
 {
     return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) || c == 0x200C || c == 0x200D;
 }
 
-static ALWAYS_INLINE bool isIdentPart(int c)
+static ALWAYS_INLINE bool isIdentPart(LChar c)
 {
     // Character types are divided into two groups depending on whether they can be part of an
     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
     // part of an identifier. (See the CharacterType definition for more details.)
-    return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c);
+    return typesOfLatin1Characters[c] <= CharacterNumber;
+}
+
+static ALWAYS_INLINE bool isIdentPart(UChar c)
+{
+    return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
 }
 
 static inline int singleEscape(int c)
@@ -499,7 +647,7 @@ template <>
 
     const LChar* identifierStart = currentCharacter();
     
-    while (isIdentPart(m_current))
+    while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current)))
         shift();
     
     if (UNLIKELY(m_current == '\\')) {
@@ -550,7 +698,7 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p
 
     UChar orAllChars = 0;
     
-    while (isIdentPart(m_current)) {
+    while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) {
         orAllChars |= m_current;
         shift();
     }
@@ -604,7 +752,7 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
     bool bufferRequired = false;
 
     while (true) {
-        if (LIKELY(isIdentPart(m_current))) {
+        if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) {
             shift();
             continue;
         }
@@ -622,10 +770,11 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
         int character = getUnicodeCharacter();
         if (UNLIKELY(character == -1))
             return ERRORTOK;
-        if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character)))
+        UChar ucharacter = static_cast<UChar>(character);
+        if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
             return ERRORTOK;
         if (shouldCreateIdentifier)
-            record16(character);
+            record16(ucharacter);
         identifierStart = currentCharacter();
     }
 
@@ -692,14 +841,15 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo
                 shiftLineTerminator();
             else if (m_current == 'x') {
                 shift();
-                if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
-                    int prev = m_current;
-                    shift();
-                    if (shouldBuildStrings)
-                        record8(convertHex(prev, m_current));
-                    shift();
-                } else if (shouldBuildStrings)
-                    record8('x');
+                if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
+                    m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
+                    return false;
+                }
+                int prev = m_current;
+                shift();
+                if (shouldBuildStrings)
+                    record8(convertHex(prev, m_current));
+                shift();
             } else {
                 setOffset(startingOffset);
                 setLineNumber(startingLineNumber);
@@ -752,18 +902,19 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
                 if (shouldBuildStrings)
                     record16(escape);
                 shift();
-            } else if (UNLIKELY(isLineTerminator(m_current)))
+            } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current))))
                 shiftLineTerminator();
             else if (m_current == 'x') {
                 shift();
-                if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
-                    int prev = m_current;
-                    shift();
-                    if (shouldBuildStrings)
-                        record16(convertHex(prev, m_current));
-                    shift();
-                } else if (shouldBuildStrings)
-                    record16('x');
+                if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
+                    m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
+                    return false;
+                }
+                int prev = m_current;
+                shift();
+                if (shouldBuildStrings)
+                    record16(convertHex(prev, m_current));
+                shift();
             } else if (m_current == 'u') {
                 shift();
                 int character = getUnicodeCharacter();
@@ -824,7 +975,7 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
         // as possible, and lets through all common ASCII characters.
         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
             // New-line or end of input is not allowed
-            if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1)) {
+            if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) {
                 m_lexErrorMessage = "Unexpected EOF";
                 return false;
             }
@@ -1004,7 +1155,7 @@ ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
         if (UNLIKELY(m_current == -1))
             return false;
 
-        if (isLineTerminator(m_current)) {
+        if (isLineTerminator(static_cast<T>(m_current))) {
             shiftLineTerminator();
             m_terminator = true;
         } else
@@ -1033,7 +1184,7 @@ JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsign
     m_terminator = false;
 
 start:
-    while (isWhiteSpace(m_current))
+    while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current)))
         shift();
 
     int startOffset = currentOffset();
@@ -1044,11 +1195,11 @@ start:
     m_delimited = false;
 
     CharacterType type;
-    if (LIKELY(isASCII(m_current)))
-        type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]);
-    else if (isNonASCIIIdentStart(m_current))
+    if (LIKELY(isLatin1(static_cast<T>(m_current))))
+        type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
+    else if (isNonLatin1IdentStart(m_current))
         type = CharacterIdentifierStart;
-    else if (isLineTerminator(m_current))
+    else if (isLineTerminator(static_cast<T>(m_current)))
         type = CharacterLineTerminator;
     else
         type = CharacterInvalid;
@@ -1335,7 +1486,7 @@ inNumberAfterDecimalPoint:
         }
 
         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
-        if (UNLIKELY(isIdentStart(m_current))) {
+        if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) {
             m_lexErrorMessage = "At least one digit must occur after a decimal point";
             goto returnError;
         }
@@ -1355,7 +1506,7 @@ inNumberAfterDecimalPoint:
         token = STRING;
         break;
     case CharacterIdentifierStart:
-        ASSERT(isIdentStart(m_current));
+        ASSERT(isIdentStart(static_cast<T>(m_current)));
         // Fall through into CharacterBackSlash.
     case CharacterBackSlash:
         if (lexerFlags & LexexFlagsDontBuildKeywords)
@@ -1364,7 +1515,7 @@ inNumberAfterDecimalPoint:
             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
         break;
     case CharacterLineTerminator:
-        ASSERT(isLineTerminator(m_current));
+        ASSERT(isLineTerminator(static_cast<T>(m_current)));
         shiftLineTerminator();
         m_atLineStart = true;
         m_terminator = true;
@@ -1382,7 +1533,7 @@ inNumberAfterDecimalPoint:
     goto returnToken;
 
 inSingleLineComment:
-    while (!isLineTerminator(m_current)) {
+    while (!isLineTerminator(static_cast<T>(m_current))) {
         if (UNLIKELY(m_current == -1))
             return EOFTOK;
         shift();
@@ -1430,7 +1581,7 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags,
     while (true) {
         int current = m_current;
 
-        if (isLineTerminator(current) || current == -1) {
+        if (isLineTerminator(static_cast<T>(current)) || current == -1) {
             m_buffer16.resize(0);
             return false;
         }
@@ -1463,7 +1614,7 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags,
     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
     m_buffer16.resize(0);
 
-    while (isIdentPart(m_current)) {
+    while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) {
         record16(m_current);
         shift();
     }
@@ -1483,7 +1634,7 @@ bool Lexer<T>::skipRegExp()
     while (true) {
         int current = m_current;
 
-        if (isLineTerminator(current) || current == -1)
+        if (isLineTerminator(static_cast<T>(current)) || current == -1)
             return false;
 
         shift();
@@ -1509,7 +1660,7 @@ bool Lexer<T>::skipRegExp()
         }
     }
 
-    while (isIdentPart(m_current))
+    while (m_current != -1 && isIdentPart(static_cast<T>(m_current)))
         shift();
 
     return true;
author	Simon Hausmann <simon.hausmann@nokia.com>	2012-03-12 14:11:15 +0100
committer	Simon Hausmann <simon.hausmann@nokia.com>	2012-03-12 14:11:15 +0100
commit	dd91e772430dc294e3bf478c119ef8d43c0a3358 (patch)
tree	6f33ce4d5872a5691e0291eb45bf6ab373a5f567 /Source/JavaScriptCore/parser/Lexer.cpp
parent	ad0d549d4cc13433f77c1ac8f0ab379c83d93f28 (diff)
download	qtwebkit-dd91e772430dc294e3bf478c119ef8d43c0a3358.tar.gz