summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/parser/Lexer.cpp
diff options
context:
space:
mode:
authorSimon Hausmann <simon.hausmann@nokia.com>2012-05-07 11:21:11 +0200
committerSimon Hausmann <simon.hausmann@nokia.com>2012-05-07 11:21:11 +0200
commit2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47 (patch)
tree988e8c5b116dd0466244ae2fe5af8ee9be926d76 /Source/JavaScriptCore/parser/Lexer.cpp
parentdd91e772430dc294e3bf478c119ef8d43c0a3358 (diff)
downloadqtwebkit-2cf6c8816a73e0132bd8fa3b509d62d7c51b6e47.tar.gz
Imported WebKit commit 7e538425aa020340619e927792f3d895061fb54b (http://svn.webkit.org/repository/webkit/trunk@116286)
Diffstat (limited to 'Source/JavaScriptCore/parser/Lexer.cpp')
-rw-r--r--Source/JavaScriptCore/parser/Lexer.cpp168
1 files changed, 84 insertions, 84 deletions
diff --git a/Source/JavaScriptCore/parser/Lexer.cpp b/Source/JavaScriptCore/parser/Lexer.cpp
index 8d50afc54..3b020f4f2 100644
--- a/Source/JavaScriptCore/parser/Lexer.cpp
+++ b/Source/JavaScriptCore/parser/Lexer.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
- * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All Rights Reserved.
* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
* Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
@@ -368,7 +368,7 @@ Lexer<T>::~Lexer()
}
template <typename T>
-UString Lexer<T>::getInvalidCharMessage()
+UString Lexer<T>::invalidCharacterMessage() const
{
switch (m_current) {
case 0:
@@ -386,7 +386,7 @@ UString Lexer<T>::getInvalidCharMessage()
case 96:
return "Invalid character: '`'";
default:
- return String::format("Invalid character '\\u%04u'", m_current).impl();
+ return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
}
}
@@ -403,7 +403,6 @@ void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
m_arena = &arena->identifierArena();
m_lineNumber = source.firstLine();
- m_delimited = false;
m_lastToken = -1;
const StringImpl* sourceString = source.provider()->data();
@@ -426,7 +425,7 @@ void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
if (LIKELY(m_code < m_codeEnd))
m_current = *m_code;
else
- m_current = -1;
+ m_current = 0;
ASSERT(currentOffset() == source.startOffset());
}
@@ -440,29 +439,34 @@ template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
template <typename T>
ALWAYS_INLINE void Lexer<T>::shift()
{
- // Faster than an if-else sequence
- ASSERT(m_current != -1);
- m_current = -1;
- m_code++;
+ // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
+ m_current = 0;
+ ++m_code;
if (LIKELY(m_code < m_codeEnd))
m_current = *m_code;
}
template <typename T>
-ALWAYS_INLINE int Lexer<T>::peek(int offset)
+ALWAYS_INLINE bool Lexer<T>::atEnd() const
+{
+ ASSERT(!m_current || m_code < m_codeEnd);
+ return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
+}
+
+template <typename T>
+ALWAYS_INLINE T Lexer<T>::peek(int offset) const
{
- // Only use if necessary
ASSERT(offset > 0 && offset < 5);
const T* code = m_code + offset;
- return (code < m_codeEnd) ? *code : -1;
+ return (code < m_codeEnd) ? *code : 0;
}
template <typename T>
-int Lexer<T>::getUnicodeCharacter()
+int Lexer<T>::parseFourDigitUnicodeHex()
{
- int char1 = peek(1);
- int char2 = peek(2);
- int char3 = peek(3);
+ T char1 = peek(1);
+ T char2 = peek(2);
+ T char3 = peek(3);
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
return -1;
@@ -478,13 +482,13 @@ int Lexer<T>::getUnicodeCharacter()
template <typename T>
void Lexer<T>::shiftLineTerminator()
{
- ASSERT(isLineTerminator(static_cast<T>(m_current)));
+ ASSERT(isLineTerminator(m_current));
- int m_prev = m_current;
+ T prev = m_current;
shift();
// Allow both CRLF and LFCR.
- if (m_prev + m_current == '\n' + '\r')
+ if (prev + m_current == '\n' + '\r')
shift();
++m_lineNumber;
@@ -629,12 +633,12 @@ template <typename T>
inline void Lexer<T>::record16(int c)
{
ASSERT(c >= 0);
- ASSERT(c <= USHRT_MAX);
+ ASSERT(c <= static_cast<int>(USHRT_MAX));
m_buffer16.append(static_cast<UChar>(c));
}
template <>
- template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
const ptrdiff_t remaining = m_codeEnd - m_code;
if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
@@ -647,7 +651,7 @@ template <>
const LChar* identifierStart = currentCharacter();
- while (m_current != -1 && isIdentPart(static_cast<LChar>(m_current)))
+ while (isIdentPart(m_current))
shift();
if (UNLIKELY(m_current == '\\')) {
@@ -665,8 +669,6 @@ template <>
} else
tokenData->ident = 0;
- m_delimited = false;
-
if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
@@ -694,11 +696,12 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
}
}
+
const UChar* identifierStart = currentCharacter();
UChar orAllChars = 0;
- while (m_current != -1 && isIdentPart(static_cast<UChar>(m_current))) {
+ while (isIdentPart(m_current)) {
orAllChars |= m_current;
shift();
}
@@ -726,8 +729,6 @@ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p
} else
tokenData->ident = 0;
- m_delimited = false;
-
if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
@@ -752,7 +753,7 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
bool bufferRequired = false;
while (true) {
- if (LIKELY(m_current != -1 && isIdentPart(static_cast<T>(m_current)))) {
+ if (LIKELY(isIdentPart(m_current))) {
shift();
continue;
}
@@ -767,7 +768,7 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
if (UNLIKELY(m_current != 'u'))
return ERRORTOK;
shift();
- int character = getUnicodeCharacter();
+ int character = parseFourDigitUnicodeHex();
if (UNLIKELY(character == -1))
return ERRORTOK;
UChar ucharacter = static_cast<UChar>(character);
@@ -794,8 +795,6 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
} else
tokenData->ident = 0;
- m_delimited = false;
-
if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
ASSERT(shouldCreateIdentifier);
// Keywords must not be recognized if there was an \uXXXX in the identifier.
@@ -814,18 +813,28 @@ template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlow
return IDENT;
}
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
+{
+ return character < 0xE;
+}
+
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
+{
+ return character < 0xE || character > 0xFF;
+}
+
template <typename T>
template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
{
int startingOffset = currentOffset();
int startingLineNumber = lineNumber();
- int stringQuoteCharacter = m_current;
+ T stringQuoteCharacter = m_current;
shift();
const T* stringStart = currentCharacter();
while (m_current != stringQuoteCharacter) {
- if (UNLIKELY((m_current == '\\'))) {
+ if (UNLIKELY(m_current == '\\')) {
if (stringStart != currentCharacter() && shouldBuildStrings)
append8(stringStart, currentCharacter() - stringStart);
shift();
@@ -845,7 +854,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo
m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
return false;
}
- int prev = m_current;
+ T prev = m_current;
shift();
if (shouldBuildStrings)
record8(convertHex(prev, m_current));
@@ -860,7 +869,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo
continue;
}
- if (UNLIKELY(((m_current > 0xff) || (m_current < 0xe)))) {
+ if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
setOffset(startingOffset);
setLineNumber(startingLineNumber);
m_buffer8.resize(0);
@@ -884,7 +893,7 @@ template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTo
template <typename T>
template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
{
- int stringQuoteCharacter = m_current;
+ T stringQuoteCharacter = m_current;
shift();
const T* stringStart = currentCharacter();
@@ -902,7 +911,7 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
if (shouldBuildStrings)
record16(escape);
shift();
- } else if (UNLIKELY(isLineTerminator(static_cast<T>(m_current))))
+ } else if (UNLIKELY(isLineTerminator(m_current)))
shiftLineTerminator();
else if (m_current == 'x') {
shift();
@@ -910,14 +919,14 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
return false;
}
- int prev = m_current;
+ T prev = m_current;
shift();
if (shouldBuildStrings)
record16(convertHex(prev, m_current));
shift();
} else if (m_current == 'u') {
shift();
- int character = getUnicodeCharacter();
+ int character = parseFourDigitUnicodeHex();
if (character != -1) {
if (shouldBuildStrings)
record16(character);
@@ -940,11 +949,11 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
record16(0);
} else if (!strictMode && isASCIIOctalDigit(m_current)) {
// Octal character sequences
- int character1 = m_current;
+ T character1 = m_current;
shift();
if (isASCIIOctalDigit(m_current)) {
// Two octal characters
- int character2 = m_current;
+ T character2 = m_current;
shift();
if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
if (shouldBuildStrings)
@@ -958,7 +967,7 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
if (shouldBuildStrings)
record16(character1 - '0');
}
- } else if (m_current != -1) {
+ } else if (!atEnd()) {
if (shouldBuildStrings)
record16(m_current);
shift();
@@ -971,11 +980,11 @@ template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenDat
continue;
}
// Fast check for characters that require special handling.
- // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
+ // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
// as possible, and lets through all common ASCII characters.
if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
// New-line or end of input is not allowed
- if (UNLIKELY(m_current == -1) || UNLIKELY(isLineTerminator(static_cast<T>(m_current)))) {
+ if (atEnd() || isLineTerminator(m_current)) {
m_lexErrorMessage = "Unexpected EOF";
return false;
}
@@ -1152,10 +1161,10 @@ ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
}
}
- if (UNLIKELY(m_current == -1))
+ if (atEnd())
return false;
- if (isLineTerminator(static_cast<T>(m_current))) {
+ if (isLineTerminator(m_current)) {
shiftLineTerminator();
m_terminator = true;
} else
@@ -1184,22 +1193,20 @@ JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsign
m_terminator = false;
start:
- while (m_current != -1 && isWhiteSpace(static_cast<T>(m_current)))
+ while (isWhiteSpace(m_current))
shift();
- int startOffset = currentOffset();
-
- if (UNLIKELY(m_current == -1))
+ if (atEnd())
return EOFTOK;
-
- m_delimited = false;
+
+ tokenInfo->startOffset = currentOffset();
CharacterType type;
- if (LIKELY(isLatin1(static_cast<T>(m_current))))
+ if (LIKELY(isLatin1(m_current)))
type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
else if (isNonLatin1IdentStart(m_current))
type = CharacterIdentifierStart;
- else if (isLineTerminator(static_cast<T>(m_current)))
+ else if (isLineTerminator(m_current))
type = CharacterLineTerminator;
else
type = CharacterInvalid;
@@ -1425,7 +1432,6 @@ start:
shift();
break;
case CharacterSemicolon:
- m_delimited = true;
shift();
token = SEMICOLON;
break;
@@ -1436,7 +1442,6 @@ start:
break;
case CharacterCloseBrace:
tokenData->intValue = currentOffset();
- m_delimited = true;
shift();
token = CLOSEBRACE;
break;
@@ -1473,25 +1478,24 @@ start:
inNumberAfterDecimalPoint:
parseNumberAfterDecimalPoint();
}
- if ((m_current | 0x20) == 'e')
+ if ((m_current | 0x20) == 'e') {
if (!parseNumberAfterExponentIndicator()) {
m_lexErrorMessage = "Non-number found after exponent indicator";
goto returnError;
}
- // Null-terminate string for strtod.
- m_buffer8.append('\0');
- tokenData->doubleValue = WTF::strtod<WTF::AllowTrailingJunk>(reinterpret_cast<const char*>(m_buffer8.data()), 0);
+ }
+ size_t parsedLength;
+ tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
}
token = NUMBER;
}
// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
- if (UNLIKELY(m_current != -1 && isIdentStart(static_cast<T>(m_current)))) {
+ if (UNLIKELY(isIdentStart(m_current))) {
m_lexErrorMessage = "At least one digit must occur after a decimal point";
goto returnError;
}
m_buffer8.resize(0);
- m_delimited = false;
break;
case CharacterQuote:
if (lexerFlags & LexerFlagsDontBuildStrings) {
@@ -1502,11 +1506,10 @@ inNumberAfterDecimalPoint:
goto returnError;
}
shift();
- m_delimited = false;
token = STRING;
break;
case CharacterIdentifierStart:
- ASSERT(isIdentStart(static_cast<T>(m_current)));
+ ASSERT(isIdentStart(m_current));
// Fall through into CharacterBackSlash.
case CharacterBackSlash:
if (lexerFlags & LexexFlagsDontBuildKeywords)
@@ -1515,13 +1518,13 @@ inNumberAfterDecimalPoint:
token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
break;
case CharacterLineTerminator:
- ASSERT(isLineTerminator(static_cast<T>(m_current)));
+ ASSERT(isLineTerminator(m_current));
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
goto start;
case CharacterInvalid:
- m_lexErrorMessage = getInvalidCharMessage();
+ m_lexErrorMessage = invalidCharacterMessage();
goto returnError;
default:
ASSERT_NOT_REACHED();
@@ -1533,8 +1536,8 @@ inNumberAfterDecimalPoint:
goto returnToken;
inSingleLineComment:
- while (!isLineTerminator(static_cast<T>(m_current))) {
- if (UNLIKELY(m_current == -1))
+ while (!isLineTerminator(m_current)) {
+ if (atEnd())
return EOFTOK;
shift();
}
@@ -1545,12 +1548,10 @@ inSingleLineComment:
goto start;
token = SEMICOLON;
- m_delimited = true;
// Fall through into returnToken.
returnToken:
tokenInfo->line = m_lineNumber;
- tokenInfo->startOffset = startOffset;
tokenInfo->endOffset = currentOffset();
m_lastToken = token;
return token;
@@ -1558,7 +1559,6 @@ returnToken:
returnError:
m_error = true;
tokenInfo->line = m_lineNumber;
- tokenInfo->startOffset = startOffset;
tokenInfo->endOffset = currentOffset();
return ERRORTOK;
}
@@ -1579,26 +1579,26 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags,
}
while (true) {
- int current = m_current;
-
- if (isLineTerminator(static_cast<T>(current)) || current == -1) {
+ if (isLineTerminator(m_current) || atEnd()) {
m_buffer16.resize(0);
return false;
}
+ T prev = m_current;
+
shift();
- if (current == '/' && !lastWasEscape && !inBrackets)
+ if (prev == '/' && !lastWasEscape && !inBrackets)
break;
- record16(current);
+ record16(prev);
if (lastWasEscape) {
lastWasEscape = false;
continue;
}
- switch (current) {
+ switch (prev) {
case '[':
inBrackets = true;
break;
@@ -1614,7 +1614,7 @@ bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags,
pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
m_buffer16.resize(0);
- while (m_current != -1 && isIdentPart(static_cast<T>(m_current))) {
+ while (isIdentPart(m_current)) {
record16(m_current);
shift();
}
@@ -1632,14 +1632,14 @@ bool Lexer<T>::skipRegExp()
bool inBrackets = false;
while (true) {
- int current = m_current;
-
- if (isLineTerminator(static_cast<T>(current)) || current == -1)
+ if (isLineTerminator(m_current) || atEnd())
return false;
+ T prev = m_current;
+
shift();
- if (current == '/' && !lastWasEscape && !inBrackets)
+ if (prev == '/' && !lastWasEscape && !inBrackets)
break;
if (lastWasEscape) {
@@ -1647,7 +1647,7 @@ bool Lexer<T>::skipRegExp()
continue;
}
- switch (current) {
+ switch (prev) {
case '[':
inBrackets = true;
break;
@@ -1660,7 +1660,7 @@ bool Lexer<T>::skipRegExp()
}
}
- while (m_current != -1 && isIdentPart(static_cast<T>(m_current)))
+ while (isIdentPart(m_current))
shift();
return true;