summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser/HTMLTokenizer.cpp
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2016-04-10 09:28:39 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2016-04-10 09:28:39 +0000
commit32761a6cee1d0dee366b885b7b9c777e67885688 (patch)
treed6bec92bebfb216f4126356e55518842c2f476a1 /Source/WebCore/html/parser/HTMLTokenizer.cpp
parenta4e969f4965059196ca948db781e52f7cfebf19e (diff)
downloadWebKitGtk-tarball-32761a6cee1d0dee366b885b7b9c777e67885688.tar.gz
webkitgtk-2.4.11webkitgtk-2.4.11
Diffstat (limited to 'Source/WebCore/html/parser/HTMLTokenizer.cpp')
-rw-r--r--Source/WebCore/html/parser/HTMLTokenizer.cpp2233
1 files changed, 1199 insertions, 1034 deletions
diff --git a/Source/WebCore/html/parser/HTMLTokenizer.cpp b/Source/WebCore/html/parser/HTMLTokenizer.cpp
index 489e6c51a..2abefaf68 100644
--- a/Source/WebCore/html/parser/HTMLTokenizer.cpp
+++ b/Source/WebCore/html/parser/HTMLTokenizer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008, 2015 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
@@ -29,9 +29,13 @@
#include "HTMLTokenizer.h"
#include "HTMLEntityParser.h"
-#include "HTMLNames.h"
+#include "HTMLTreeBuilder.h"
#include "MarkupTokenizerInlines.h"
+#include "NotImplemented.h"
#include <wtf/ASCIICType.h>
+#include <wtf/CurrentTime.h>
+#include <wtf/text/CString.h>
+#include <wtf/unicode/Unicode.h>
using namespace WTF;
@@ -39,95 +43,81 @@ namespace WebCore {
using namespace HTMLNames;
-static inline LChar convertASCIIAlphaToLower(UChar character)
+// This has to go in a .cpp file, as the linker doesn't like it being included more than once.
+// We don't have an HTMLToken.cpp though, so this is the next best place.
+QualifiedName AtomicHTMLToken::nameForAttribute(const HTMLToken::Attribute& attribute) const
{
- ASSERT(isASCIIAlpha(character));
- return toASCIILowerUnchecked(character);
+ return QualifiedName(nullAtom, AtomicString(attribute.name), nullAtom);
}
-static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const char* string)
+bool AtomicHTMLToken::usesName() const
{
- unsigned size = vector.size();
- for (unsigned i = 0; i < size; ++i) {
- if (!string[i] || vector[i] != string[i])
- return false;
- }
- return !string[size];
+ return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE;
}
-inline bool HTMLTokenizer::inEndTagBufferingState() const
+bool AtomicHTMLToken::usesAttributes() const
{
- switch (m_state) {
- case RCDATAEndTagOpenState:
- case RCDATAEndTagNameState:
- case RAWTEXTEndTagOpenState:
- case RAWTEXTEndTagNameState:
- case ScriptDataEndTagOpenState:
- case ScriptDataEndTagNameState:
- case ScriptDataEscapedEndTagOpenState:
- case ScriptDataEscapedEndTagNameState:
- return true;
- default:
- return false;
- }
+ return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
}
-HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
- : m_preprocessor(*this)
- , m_options(options)
+static inline UChar toLowerCase(UChar cc)
{
+ ASSERT(isASCIIUpper(cc));
+ const int lowerCaseOffset = 0x20;
+ return cc + lowerCaseOffset;
}
-inline void HTMLTokenizer::bufferASCIICharacter(UChar character)
+static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const String& string)
{
- ASSERT(character != kEndOfFileMarker);
- ASSERT(isASCII(character));
- LChar narrowedCharacter = character;
- m_token.appendToCharacter(narrowedCharacter);
-}
+ if (vector.size() != string.length())
+ return false;
-inline void HTMLTokenizer::bufferCharacter(UChar character)
-{
- ASSERT(character != kEndOfFileMarker);
- m_token.appendToCharacter(character);
-}
+ if (!string.length())
+ return true;
-inline bool HTMLTokenizer::emitAndResumeInDataState(SegmentedString& source)
-{
- saveEndTagNameIfNeeded();
- m_state = DataState;
- source.advanceAndUpdateLineNumber();
- return true;
+ return equal(string.impl(), vector.data(), vector.size());
}
-inline bool HTMLTokenizer::emitAndReconsumeInDataState()
+static inline bool isEndTagBufferingState(HTMLTokenizer::State state)
{
- saveEndTagNameIfNeeded();
- m_state = DataState;
- return true;
+ switch (state) {
+ case HTMLTokenizer::RCDATAEndTagOpenState:
+ case HTMLTokenizer::RCDATAEndTagNameState:
+ case HTMLTokenizer::RAWTEXTEndTagOpenState:
+ case HTMLTokenizer::RAWTEXTEndTagNameState:
+ case HTMLTokenizer::ScriptDataEndTagOpenState:
+ case HTMLTokenizer::ScriptDataEndTagNameState:
+ case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
+ case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
+ return true;
+ default:
+ return false;
+ }
}
-inline bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
+#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
+#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
+#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
+#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
+
+HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
+ : m_inputStreamPreprocessor(this)
+ , m_options(options)
{
- m_state = DataState;
- if (haveBufferedCharacterToken())
- return true;
- source.advance();
- m_token.clear();
- m_token.makeEndOfFile();
- return true;
+ reset();
}
-inline void HTMLTokenizer::saveEndTagNameIfNeeded()
+HTMLTokenizer::~HTMLTokenizer()
{
- ASSERT(m_token.type() != HTMLToken::Uninitialized);
- if (m_token.type() == HTMLToken::StartTag)
- m_appropriateEndTagName = m_token.name();
}
-inline bool HTMLTokenizer::haveBufferedCharacterToken() const
+void HTMLTokenizer::reset()
{
- return m_token.type() == HTMLToken::Character;
+ m_state = HTMLTokenizer::DataState;
+ m_token = 0;
+ m_forceNullCharacterReplacement = false;
+ m_shouldAllowCDATA = false;
+ m_additionalAllowedCharacter = '\0';
}
inline bool HTMLTokenizer::processEntity(SegmentedString& source)
@@ -139,7 +129,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
return false;
if (!success) {
ASSERT(decodedEntity.isEmpty());
- bufferASCIICharacter('&');
+ bufferCharacter('&');
} else {
for (unsigned i = 0; i < decodedEntity.length(); ++i)
bufferCharacter(decodedEntity[i]);
@@ -147,1246 +137,1426 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
return true;
}
-void HTMLTokenizer::flushBufferedEndTag()
+bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
{
- m_token.beginEndTag(m_bufferedEndTagName);
+ ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
+ source.advanceAndUpdateLineNumber();
+ if (m_token->type() == HTMLToken::Character)
+ return true;
+ m_token->beginEndTag(m_bufferedEndTagName);
m_bufferedEndTagName.clear();
m_appropriateEndTagName.clear();
m_temporaryBuffer.clear();
-}
-
-bool HTMLTokenizer::commitToPartialEndTag(SegmentedString& source, UChar character, State state)
-{
- ASSERT(source.currentChar() == character);
- appendToTemporaryBuffer(character);
- source.advanceAndUpdateLineNumber();
-
- if (haveBufferedCharacterToken()) {
- // Emit the buffered character token.
- // The next call to processToken will flush the buffered end tag and continue parsing it.
- m_state = state;
- return true;
- }
-
- flushBufferedEndTag();
return false;
}
-bool HTMLTokenizer::commitToCompleteEndTag(SegmentedString& source)
+#define FLUSH_AND_ADVANCE_TO(stateName) \
+ do { \
+ m_state = HTMLTokenizer::stateName; \
+ if (flushBufferedEndTag(source)) \
+ return true; \
+ if (source.isEmpty() \
+ || !m_inputStreamPreprocessor.peek(source)) \
+ return haveBufferedCharacterToken(); \
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); \
+ goto stateName; \
+ } while (false)
+
+bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, HTMLTokenizer::State state)
{
- ASSERT(source.currentChar() == '>');
- appendToTemporaryBuffer('>');
- source.advance();
-
- m_state = DataState;
-
- if (haveBufferedCharacterToken()) {
- // Emit the character token we already have.
- // The next call to processToken will flush the buffered end tag and emit it.
- return true;
- }
-
- flushBufferedEndTag();
+ m_state = state;
+ flushBufferedEndTag(source);
return true;
}
-bool HTMLTokenizer::processToken(SegmentedString& source)
+bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
{
- if (!m_bufferedEndTagName.isEmpty() && !inEndTagBufferingState()) {
- // We are back here after emitting a character token that came just before an end tag.
- // To continue parsing the end tag we need to move the buffered tag name into the token.
- flushBufferedEndTag();
-
- // If we are in the data state, the end tag is already complete and we should emit it
- // now, otherwise, we want to resume parsing the partial end tag.
- if (m_state == DataState)
+ // If we have a token in progress, then we're supposed to be called back
+ // with the same token so we can finish it.
+ ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
+ m_token = &token;
+
+ if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
+ // FIXME: This should call flushBufferedEndTag().
+ // We started an end tag during our last iteration.
+ m_token->beginEndTag(m_bufferedEndTagName);
+ m_bufferedEndTagName.clear();
+ m_appropriateEndTagName.clear();
+ m_temporaryBuffer.clear();
+ if (m_state == HTMLTokenizer::DataState) {
+ // We're back in the data state, so we must be done with the tag.
return true;
+ }
}
- if (!m_preprocessor.peek(source, isNullCharacterSkippingState(m_state)))
+ if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
return haveBufferedCharacterToken();
- UChar character = m_preprocessor.nextInputCharacter();
+ UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
- // https://html.spec.whatwg.org/#tokenization
+ // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
switch (m_state) {
-
- BEGIN_STATE(DataState)
- if (character == '&')
- ADVANCE_TO(CharacterReferenceInDataState);
- if (character == '<') {
- if (haveBufferedCharacterToken())
- RETURN_IN_CURRENT_STATE(true);
- ADVANCE_TO(TagOpenState);
- }
- if (character == kEndOfFileMarker)
+ HTML_BEGIN_STATE(DataState) {
+ if (cc == '&')
+ HTML_ADVANCE_TO(CharacterReferenceInDataState);
+ else if (cc == '<') {
+ if (m_token->type() == HTMLToken::Character) {
+ // We have a bunch of character tokens queued up that we
+ // are emitting lazily here.
+ return true;
+ }
+ HTML_ADVANCE_TO(TagOpenState);
+ } else if (cc == kEndOfFileMarker)
return emitEndOfFile(source);
- bufferCharacter(character);
- ADVANCE_TO(DataState);
+ else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(DataState);
+ }
+ }
END_STATE()
- BEGIN_STATE(CharacterReferenceInDataState)
+ HTML_BEGIN_STATE(CharacterReferenceInDataState) {
if (!processEntity(source))
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
- SWITCH_TO(DataState);
+ return haveBufferedCharacterToken();
+ HTML_SWITCH_TO(DataState);
+ }
END_STATE()
- BEGIN_STATE(RCDATAState)
- if (character == '&')
- ADVANCE_TO(CharacterReferenceInRCDATAState);
- if (character == '<')
- ADVANCE_TO(RCDATALessThanSignState);
- if (character == kEndOfFileMarker)
- RECONSUME_IN(DataState);
- bufferCharacter(character);
- ADVANCE_TO(RCDATAState);
+ HTML_BEGIN_STATE(RCDATAState) {
+ if (cc == '&')
+ HTML_ADVANCE_TO(CharacterReferenceInRCDATAState);
+ else if (cc == '<')
+ HTML_ADVANCE_TO(RCDATALessThanSignState);
+ else if (cc == kEndOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(RCDATAState);
+ }
+ }
END_STATE()
- BEGIN_STATE(CharacterReferenceInRCDATAState)
+ HTML_BEGIN_STATE(CharacterReferenceInRCDATAState) {
if (!processEntity(source))
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
- SWITCH_TO(RCDATAState);
- END_STATE()
-
- BEGIN_STATE(RAWTEXTState)
- if (character == '<')
- ADVANCE_TO(RAWTEXTLessThanSignState);
- if (character == kEndOfFileMarker)
- RECONSUME_IN(DataState);
- bufferCharacter(character);
- ADVANCE_TO(RAWTEXTState);
- END_STATE()
-
- BEGIN_STATE(ScriptDataState)
- if (character == '<')
- ADVANCE_TO(ScriptDataLessThanSignState);
- if (character == kEndOfFileMarker)
- RECONSUME_IN(DataState);
- bufferCharacter(character);
- ADVANCE_TO(ScriptDataState);
- END_STATE()
-
- BEGIN_STATE(PLAINTEXTState)
- if (character == kEndOfFileMarker)
- RECONSUME_IN(DataState);
- bufferCharacter(character);
- ADVANCE_TO(PLAINTEXTState);
- END_STATE()
-
- BEGIN_STATE(TagOpenState)
- if (character == '!')
- ADVANCE_TO(MarkupDeclarationOpenState);
- if (character == '/')
- ADVANCE_TO(EndTagOpenState);
- if (isASCIIAlpha(character)) {
- m_token.beginStartTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(TagNameState);
+ return haveBufferedCharacterToken();
+ HTML_SWITCH_TO(RCDATAState);
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(RAWTEXTState) {
+ if (cc == '<')
+ HTML_ADVANCE_TO(RAWTEXTLessThanSignState);
+ else if (cc == kEndOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(RAWTEXTState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(ScriptDataState) {
+ if (cc == '<')
+ HTML_ADVANCE_TO(ScriptDataLessThanSignState);
+ else if (cc == kEndOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataState);
}
- if (character == '?') {
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(PLAINTEXTState) {
+ if (cc == kEndOfFileMarker)
+ return emitEndOfFile(source);
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(PLAINTEXTState);
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(TagOpenState) {
+ if (cc == '!')
+ HTML_ADVANCE_TO(MarkupDeclarationOpenState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(EndTagOpenState);
+ else if (isASCIIUpper(cc)) {
+ m_token->beginStartTag(toLowerCase(cc));
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (isASCIILower(cc)) {
+ m_token->beginStartTag(cc);
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (cc == '?') {
parseError();
// The spec consumes the current character before switching
// to the bogus comment state, but it's easier to implement
// if we reconsume the current character.
- RECONSUME_IN(BogusCommentState);
+ HTML_RECONSUME_IN(BogusCommentState);
+ } else {
+ parseError();
+ bufferCharacter('<');
+ HTML_RECONSUME_IN(DataState);
}
- parseError();
- bufferASCIICharacter('<');
- RECONSUME_IN(DataState);
+ }
END_STATE()
- BEGIN_STATE(EndTagOpenState)
- if (isASCIIAlpha(character)) {
- m_token.beginEndTag(convertASCIIAlphaToLower(character));
+ HTML_BEGIN_STATE(EndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
m_appropriateEndTagName.clear();
- ADVANCE_TO(TagNameState);
- }
- if (character == '>') {
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (isASCIILower(cc)) {
+ m_token->beginEndTag(static_cast<LChar>(cc));
+ m_appropriateEndTagName.clear();
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (cc == '>') {
parseError();
- ADVANCE_TO(DataState);
- }
- if (character == kEndOfFileMarker) {
+ HTML_ADVANCE_TO(DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ bufferCharacter('<');
+ bufferCharacter('/');
+ HTML_RECONSUME_IN(DataState);
+ } else {
parseError();
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(BogusCommentState);
}
- parseError();
- RECONSUME_IN(BogusCommentState);
+ }
END_STATE()
- BEGIN_STATE(TagNameState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeAttributeNameState);
- if (character == '/')
- ADVANCE_TO(SelfClosingStartTagState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (m_options.usePreHTML5ParserQuirks && character == '<')
- return emitAndReconsumeInDataState();
- if (character == kEndOfFileMarker) {
- parseError();
- RECONSUME_IN(DataState);
+ HTML_BEGIN_STATE(TagNameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (m_options.usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->appendToName(toLowerCase(cc));
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ m_token->appendToName(cc);
+ HTML_ADVANCE_TO(TagNameState);
}
- m_token.appendToName(toASCIILower(character));
- ADVANCE_TO(TagNameState);
+ }
END_STATE()
- BEGIN_STATE(RCDATALessThanSignState)
- if (character == '/') {
+ HTML_BEGIN_STATE(RCDATALessThanSignState) {
+ if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- ADVANCE_TO(RCDATAEndTagOpenState);
+ HTML_ADVANCE_TO(RCDATAEndTagOpenState);
+ } else {
+ bufferCharacter('<');
+ HTML_RECONSUME_IN(RCDATAState);
}
- bufferASCIICharacter('<');
- RECONSUME_IN(RCDATAState);
+ }
END_STATE()
- BEGIN_STATE(RCDATAEndTagOpenState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(RCDATAEndTagNameState);
+ HTML_BEGIN_STATE(RCDATAEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(RCDATAEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(RCDATAEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ HTML_RECONSUME_IN(RCDATAState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- RECONSUME_IN(RCDATAState);
+ }
END_STATE()
- BEGIN_STATE(RCDATAEndTagNameState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(RCDATAEndTagNameState);
- }
- if (isTokenizerWhitespace(character)) {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
- return true;
- SWITCH_TO(BeforeAttributeNameState);
- }
- } else if (character == '/') {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
- return true;
- SWITCH_TO(SelfClosingStartTagState);
+ HTML_BEGIN_STATE(RCDATAEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(RCDATAEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(RCDATAEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ }
+ } else if (cc == '/') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ }
+ } else if (cc == '>') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+ }
}
- } else if (character == '>') {
- if (isAppropriateEndTag())
- return commitToCompleteEndTag(source);
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ HTML_RECONSUME_IN(RCDATAState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- m_token.appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- RECONSUME_IN(RCDATAState);
+ }
END_STATE()
- BEGIN_STATE(RAWTEXTLessThanSignState)
- if (character == '/') {
+ HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {
+ if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- ADVANCE_TO(RAWTEXTEndTagOpenState);
+ HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);
+ } else {
+ bufferCharacter('<');
+ HTML_RECONSUME_IN(RAWTEXTState);
}
- bufferASCIICharacter('<');
- RECONSUME_IN(RAWTEXTState);
+ }
END_STATE()
- BEGIN_STATE(RAWTEXTEndTagOpenState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(RAWTEXTEndTagNameState);
+ HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ HTML_RECONSUME_IN(RAWTEXTState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- RECONSUME_IN(RAWTEXTState);
+ }
END_STATE()
- BEGIN_STATE(RAWTEXTEndTagNameState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(RAWTEXTEndTagNameState);
- }
- if (isTokenizerWhitespace(character)) {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
- return true;
- SWITCH_TO(BeforeAttributeNameState);
- }
- } else if (character == '/') {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
- return true;
- SWITCH_TO(SelfClosingStartTagState);
+ HTML_BEGIN_STATE(RAWTEXTEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ }
+ } else if (cc == '/') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ }
+ } else if (cc == '>') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+ }
}
- } else if (character == '>') {
- if (isAppropriateEndTag())
- return commitToCompleteEndTag(source);
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ HTML_RECONSUME_IN(RAWTEXTState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- m_token.appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- RECONSUME_IN(RAWTEXTState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataLessThanSignState)
- if (character == '/') {
+ HTML_BEGIN_STATE(ScriptDataLessThanSignState) {
+ if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- ADVANCE_TO(ScriptDataEndTagOpenState);
- }
- if (character == '!') {
- bufferASCIICharacter('<');
- bufferASCIICharacter('!');
- ADVANCE_TO(ScriptDataEscapeStartState);
+ HTML_ADVANCE_TO(ScriptDataEndTagOpenState);
+ } else if (cc == '!') {
+ bufferCharacter('<');
+ bufferCharacter('!');
+ HTML_ADVANCE_TO(ScriptDataEscapeStartState);
+ } else {
+ bufferCharacter('<');
+ HTML_RECONSUME_IN(ScriptDataState);
}
- bufferASCIICharacter('<');
- RECONSUME_IN(ScriptDataState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEndTagOpenState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(ScriptDataEndTagNameState);
+ HTML_BEGIN_STATE(ScriptDataEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ HTML_RECONSUME_IN(ScriptDataState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- RECONSUME_IN(ScriptDataState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEndTagNameState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(ScriptDataEndTagNameState);
- }
- if (isTokenizerWhitespace(character)) {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
- return true;
- SWITCH_TO(BeforeAttributeNameState);
- }
- } else if (character == '/') {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
- return true;
- SWITCH_TO(SelfClosingStartTagState);
+ HTML_BEGIN_STATE(ScriptDataEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(ScriptDataEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ }
+ } else if (cc == '/') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ }
+ } else if (cc == '>') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+ }
}
- } else if (character == '>') {
- if (isAppropriateEndTag())
- return commitToCompleteEndTag(source);
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ HTML_RECONSUME_IN(ScriptDataState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- m_token.appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- RECONSUME_IN(ScriptDataState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapeStartState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataEscapeStartDashState);
+ HTML_BEGIN_STATE(ScriptDataEscapeStartState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapeStartDashState);
} else
- RECONSUME_IN(ScriptDataState);
+ HTML_RECONSUME_IN(ScriptDataState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapeStartDashState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataEscapedDashDashState);
+ HTML_BEGIN_STATE(ScriptDataEscapeStartDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
} else
- RECONSUME_IN(ScriptDataState);
+ HTML_RECONSUME_IN(ScriptDataState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapedState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataEscapedDashState);
- }
- if (character == '<')
- ADVANCE_TO(ScriptDataEscapedLessThanSignState);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(ScriptDataEscapedState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapedDashState);
+ } else if (cc == '<')
+ HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapedState);
}
- bufferCharacter(character);
- ADVANCE_TO(ScriptDataEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapedDashState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataEscapedDashDashState);
- }
- if (character == '<')
- ADVANCE_TO(ScriptDataEscapedLessThanSignState);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(ScriptDataEscapedDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
+ } else if (cc == '<')
+ HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapedState);
}
- bufferCharacter(character);
- ADVANCE_TO(ScriptDataEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapedDashDashState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataEscapedDashDashState);
- }
- if (character == '<')
- ADVANCE_TO(ScriptDataEscapedLessThanSignState);
- if (character == '>') {
- bufferASCIICharacter('>');
- ADVANCE_TO(ScriptDataState);
- }
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(ScriptDataEscapedDashDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
+ } else if (cc == '<')
+ HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+ else if (cc == '>') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataEscapedState);
}
- bufferCharacter(character);
- ADVANCE_TO(ScriptDataEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapedLessThanSignState)
- if (character == '/') {
+ HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
+ if (cc == '/') {
m_temporaryBuffer.clear();
ASSERT(m_bufferedEndTagName.isEmpty());
- ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
- }
- if (isASCIIAlpha(character)) {
- bufferASCIICharacter('<');
- bufferASCIICharacter(character);
+ HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
+ } else if (isASCIIUpper(cc)) {
+ bufferCharacter('<');
+ bufferCharacter(cc);
+ m_temporaryBuffer.clear();
+ m_temporaryBuffer.append(toLowerCase(cc));
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else if (isASCIILower(cc)) {
+ bufferCharacter('<');
+ bufferCharacter(cc);
m_temporaryBuffer.clear();
- appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
- ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else {
+ bufferCharacter('<');
+ HTML_RECONSUME_IN(ScriptDataEscapedState);
}
- bufferASCIICharacter('<');
- RECONSUME_IN(ScriptDataEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapedEndTagOpenState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ HTML_BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ HTML_RECONSUME_IN(ScriptDataEscapedState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- RECONSUME_IN(ScriptDataEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataEscapedEndTagNameState)
- if (isASCIIAlpha(character)) {
- appendToTemporaryBuffer(character);
- appendToPossibleEndTag(convertASCIIAlphaToLower(character));
- ADVANCE_TO(ScriptDataEscapedEndTagNameState);
- }
- if (isTokenizerWhitespace(character)) {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, character, BeforeAttributeNameState))
- return true;
- SWITCH_TO(BeforeAttributeNameState);
- }
- } else if (character == '/') {
- if (isAppropriateEndTag()) {
- if (commitToPartialEndTag(source, '/', SelfClosingStartTagState))
- return true;
- SWITCH_TO(SelfClosingStartTagState);
+ HTML_BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ addToPossibleEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ }
+ } else if (cc == '/') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ }
+ } else if (cc == '>') {
+ if (isAppropriateEndTag()) {
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
+ }
}
- } else if (character == '>') {
- if (isAppropriateEndTag())
- return commitToCompleteEndTag(source);
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ m_temporaryBuffer.clear();
+ HTML_RECONSUME_IN(ScriptDataEscapedState);
}
- bufferASCIICharacter('<');
- bufferASCIICharacter('/');
- m_token.appendToCharacter(m_temporaryBuffer);
- m_bufferedEndTagName.clear();
- m_temporaryBuffer.clear();
- RECONSUME_IN(ScriptDataEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataDoubleEscapeStartState)
- if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
- bufferASCIICharacter(character);
- if (temporaryBufferIs("script"))
- ADVANCE_TO(ScriptDataDoubleEscapedState);
+ HTML_BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
+ if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
+ bufferCharacter(cc);
+ if (temporaryBufferIs(scriptTag.localName()))
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
else
- ADVANCE_TO(ScriptDataEscapedState);
- }
- if (isASCIIAlpha(character)) {
- bufferASCIICharacter(character);
- appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
- ADVANCE_TO(ScriptDataDoubleEscapeStartState);
- }
- RECONSUME_IN(ScriptDataEscapedState);
+ HTML_ADVANCE_TO(ScriptDataEscapedState);
+ } else if (isASCIIUpper(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(toLowerCase(cc));
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else if (isASCIILower(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else
+ HTML_RECONSUME_IN(ScriptDataEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataDoubleEscapedState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataDoubleEscapedDashState);
- }
- if (character == '<') {
- bufferASCIICharacter('<');
- ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
- }
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(ScriptDataDoubleEscapedState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashState);
+ } else if (cc == '<') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
}
- bufferCharacter(character);
- ADVANCE_TO(ScriptDataDoubleEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataDoubleEscapedDashState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
- }
- if (character == '<') {
- bufferASCIICharacter('<');
- ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
- }
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+ } else if (cc == '<') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
}
- bufferCharacter(character);
- ADVANCE_TO(ScriptDataDoubleEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataDoubleEscapedDashDashState)
- if (character == '-') {
- bufferASCIICharacter('-');
- ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
- }
- if (character == '<') {
- bufferASCIICharacter('<');
- ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
- }
- if (character == '>') {
- bufferASCIICharacter('>');
- ADVANCE_TO(ScriptDataState);
- }
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+ } else if (cc == '<') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ } else if (cc == '>') {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
}
- bufferCharacter(character);
- ADVANCE_TO(ScriptDataDoubleEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState)
- if (character == '/') {
- bufferASCIICharacter('/');
+ HTML_BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
+ if (cc == '/') {
+ bufferCharacter(cc);
m_temporaryBuffer.clear();
- ADVANCE_TO(ScriptDataDoubleEscapeEndState);
- }
- RECONSUME_IN(ScriptDataDoubleEscapedState);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+ } else
+ HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
+ }
END_STATE()
- BEGIN_STATE(ScriptDataDoubleEscapeEndState)
- if (isTokenizerWhitespace(character) || character == '/' || character == '>') {
- bufferASCIICharacter(character);
- if (temporaryBufferIs("script"))
- ADVANCE_TO(ScriptDataEscapedState);
+ HTML_BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
+ if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
+ bufferCharacter(cc);
+ if (temporaryBufferIs(scriptTag.localName()))
+ HTML_ADVANCE_TO(ScriptDataEscapedState);
else
- ADVANCE_TO(ScriptDataDoubleEscapedState);
- }
- if (isASCIIAlpha(character)) {
- bufferASCIICharacter(character);
- appendToTemporaryBuffer(convertASCIIAlphaToLower(character));
- ADVANCE_TO(ScriptDataDoubleEscapeEndState);
- }
- RECONSUME_IN(ScriptDataDoubleEscapedState);
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
+ } else if (isASCIIUpper(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(toLowerCase(cc));
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+ } else if (isASCIILower(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+ } else
+ HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
+ }
END_STATE()
- BEGIN_STATE(BeforeAttributeNameState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeAttributeNameState);
- if (character == '/')
- ADVANCE_TO(SelfClosingStartTagState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (m_options.usePreHTML5ParserQuirks && character == '<')
- return emitAndReconsumeInDataState();
- if (character == kEndOfFileMarker) {
- parseError();
- RECONSUME_IN(DataState);
- }
- if (character == '"' || character == '\'' || character == '<' || character == '=')
- parseError();
- m_token.beginAttribute(source.numberOfCharactersConsumed());
- m_token.appendToAttributeName(toASCIILower(character));
- ADVANCE_TO(AttributeNameState);
- END_STATE()
-
- BEGIN_STATE(AttributeNameState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(AfterAttributeNameState);
- if (character == '/')
- ADVANCE_TO(SelfClosingStartTagState);
- if (character == '=')
- ADVANCE_TO(BeforeAttributeValueState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (m_options.usePreHTML5ParserQuirks && character == '<')
- return emitAndReconsumeInDataState();
- if (character == kEndOfFileMarker) {
- parseError();
- RECONSUME_IN(DataState);
- }
- if (character == '"' || character == '\'' || character == '<' || character == '=')
- parseError();
- m_token.appendToAttributeName(toASCIILower(character));
- ADVANCE_TO(AttributeNameState);
- END_STATE()
-
- BEGIN_STATE(AfterAttributeNameState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(AfterAttributeNameState);
- if (character == '/')
- ADVANCE_TO(SelfClosingStartTagState);
- if (character == '=')
- ADVANCE_TO(BeforeAttributeValueState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (m_options.usePreHTML5ParserQuirks && character == '<')
- return emitAndReconsumeInDataState();
- if (character == kEndOfFileMarker) {
- parseError();
- RECONSUME_IN(DataState);
+ HTML_BEGIN_STATE(BeforeAttributeNameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (m_options.usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(toLowerCase(cc));
+ HTML_ADVANCE_TO(AttributeNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+ parseError();
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(cc);
+ HTML_ADVANCE_TO(AttributeNameState);
}
- if (character == '"' || character == '\'' || character == '<')
- parseError();
- m_token.beginAttribute(source.numberOfCharactersConsumed());
- m_token.appendToAttributeName(toASCIILower(character));
- ADVANCE_TO(AttributeNameState);
+ }
END_STATE()
- BEGIN_STATE(BeforeAttributeValueState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeAttributeValueState);
- if (character == '"')
- ADVANCE_TO(AttributeValueDoubleQuotedState);
- if (character == '&')
- RECONSUME_IN(AttributeValueUnquotedState);
- if (character == '\'')
- ADVANCE_TO(AttributeValueSingleQuotedState);
- if (character == '>') {
- parseError();
- return emitAndResumeInDataState(source);
+ HTML_BEGIN_STATE(AttributeNameState) {
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ HTML_ADVANCE_TO(AfterAttributeNameState);
+ } else if (cc == '/') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ } else if (cc == '=') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ HTML_ADVANCE_TO(BeforeAttributeValueState);
+ } else if (cc == '>') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (m_options.usePreHTML5ParserQuirks && cc == '<') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else if (isASCIIUpper(cc)) {
+ m_token->appendToAttributeName(toLowerCase(cc));
+ HTML_ADVANCE_TO(AttributeNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+ parseError();
+ m_token->appendToAttributeName(cc);
+ HTML_ADVANCE_TO(AttributeNameState);
}
- if (character == kEndOfFileMarker) {
- parseError();
- RECONSUME_IN(DataState);
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterAttributeNameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '=')
+ HTML_ADVANCE_TO(BeforeAttributeValueState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (m_options.usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(toLowerCase(cc));
+ HTML_ADVANCE_TO(AttributeNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<')
+ parseError();
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(cc);
+ HTML_ADVANCE_TO(AttributeNameState);
}
- if (character == '<' || character == '=' || character == '`')
- parseError();
- m_token.appendToAttributeValue(character);
- ADVANCE_TO(AttributeValueUnquotedState);
+ }
END_STATE()
- BEGIN_STATE(AttributeValueDoubleQuotedState)
- if (character == '"') {
- m_token.endAttribute(source.numberOfCharactersConsumed());
- ADVANCE_TO(AfterAttributeValueQuotedState);
+ HTML_BEGIN_STATE(BeforeAttributeValueState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeValueState);
+ else if (cc == '"') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+ HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
+ } else if (cc == '&') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
+ HTML_RECONSUME_IN(AttributeValueUnquotedState);
+ } else if (cc == '\'') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+ HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '<' || cc == '=' || cc == '`')
+ parseError();
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeValue(cc);
+ HTML_ADVANCE_TO(AttributeValueUnquotedState);
}
- if (character == '&') {
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
+ if (cc == '"') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '"';
- ADVANCE_TO(CharacterReferenceInAttributeValueState);
- }
- if (character == kEndOfFileMarker) {
+ HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.endAttribute(source.numberOfCharactersConsumed());
- RECONSUME_IN(DataState);
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ m_token->appendToAttributeValue(cc);
+ HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
}
- m_token.appendToAttributeValue(character);
- ADVANCE_TO(AttributeValueDoubleQuotedState);
+ }
END_STATE()
- BEGIN_STATE(AttributeValueSingleQuotedState)
- if (character == '\'') {
- m_token.endAttribute(source.numberOfCharactersConsumed());
- ADVANCE_TO(AfterAttributeValueQuotedState);
- }
- if (character == '&') {
+ HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
+ if (cc == '\'') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '\'';
- ADVANCE_TO(CharacterReferenceInAttributeValueState);
- }
- if (character == kEndOfFileMarker) {
+ HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.endAttribute(source.numberOfCharactersConsumed());
- RECONSUME_IN(DataState);
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ m_token->appendToAttributeValue(cc);
+ HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
}
- m_token.appendToAttributeValue(character);
- ADVANCE_TO(AttributeValueSingleQuotedState);
+ }
END_STATE()
- BEGIN_STATE(AttributeValueUnquotedState)
- if (isTokenizerWhitespace(character)) {
- m_token.endAttribute(source.numberOfCharactersConsumed());
- ADVANCE_TO(BeforeAttributeNameState);
- }
- if (character == '&') {
+ HTML_BEGIN_STATE(AttributeValueUnquotedState) {
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '>';
- ADVANCE_TO(CharacterReferenceInAttributeValueState);
- }
- if (character == '>') {
- m_token.endAttribute(source.numberOfCharactersConsumed());
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
- parseError();
- m_token.endAttribute(source.numberOfCharactersConsumed());
- RECONSUME_IN(DataState);
+ HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
+ } else if (cc == '>') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
+ parseError();
+ m_token->appendToAttributeValue(cc);
+ HTML_ADVANCE_TO(AttributeValueUnquotedState);
}
- if (character == '"' || character == '\'' || character == '<' || character == '=' || character == '`')
- parseError();
- m_token.appendToAttributeValue(character);
- ADVANCE_TO(AttributeValueUnquotedState);
+ }
END_STATE()
- BEGIN_STATE(CharacterReferenceInAttributeValueState)
+ HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
bool notEnoughCharacters = false;
StringBuilder decodedEntity;
bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
if (notEnoughCharacters)
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ return haveBufferedCharacterToken();
if (!success) {
ASSERT(decodedEntity.isEmpty());
- m_token.appendToAttributeValue('&');
+ m_token->appendToAttributeValue('&');
} else {
for (unsigned i = 0; i < decodedEntity.length(); ++i)
- m_token.appendToAttributeValue(decodedEntity[i]);
+ m_token->appendToAttributeValue(decodedEntity[i]);
}
// We're supposed to switch back to the attribute value state that
// we were in when we were switched into this state. Rather than
// keeping track of this explictly, we observe that the previous
// state can be determined by m_additionalAllowedCharacter.
if (m_additionalAllowedCharacter == '"')
- SWITCH_TO(AttributeValueDoubleQuotedState);
- if (m_additionalAllowedCharacter == '\'')
- SWITCH_TO(AttributeValueSingleQuotedState);
- ASSERT(m_additionalAllowedCharacter == '>');
- SWITCH_TO(AttributeValueUnquotedState);
- END_STATE()
-
- BEGIN_STATE(AfterAttributeValueQuotedState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeAttributeNameState);
- if (character == '/')
- ADVANCE_TO(SelfClosingStartTagState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (m_options.usePreHTML5ParserQuirks && character == '<')
- return emitAndReconsumeInDataState();
- if (character == kEndOfFileMarker) {
- parseError();
- RECONSUME_IN(DataState);
- }
- parseError();
- RECONSUME_IN(BeforeAttributeNameState);
+ HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
+ else if (m_additionalAllowedCharacter == '\'')
+ HTML_SWITCH_TO(AttributeValueSingleQuotedState);
+ else if (m_additionalAllowedCharacter == '>')
+ HTML_SWITCH_TO(AttributeValueUnquotedState);
+ else
+ ASSERT_NOT_REACHED();
+ }
END_STATE()
- BEGIN_STATE(SelfClosingStartTagState)
- if (character == '>') {
- m_token.setSelfClosing();
- return emitAndResumeInDataState(source);
+ HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (m_options.usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ parseError();
+ HTML_RECONSUME_IN(BeforeAttributeNameState);
}
- if (character == kEndOfFileMarker) {
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(SelfClosingStartTagState) {
+ if (cc == '>') {
+ m_token->setSelfClosing();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
parseError();
- RECONSUME_IN(DataState);
+ HTML_RECONSUME_IN(BeforeAttributeNameState);
}
- parseError();
- RECONSUME_IN(BeforeAttributeNameState);
+ }
END_STATE()
- BEGIN_STATE(BogusCommentState)
- m_token.beginComment();
- RECONSUME_IN(ContinueBogusCommentState);
+ HTML_BEGIN_STATE(BogusCommentState) {
+ m_token->beginComment();
+ HTML_RECONSUME_IN(ContinueBogusCommentState);
+ }
END_STATE()
- BEGIN_STATE(ContinueBogusCommentState)
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == kEndOfFileMarker)
- return emitAndReconsumeInDataState();
- m_token.appendToComment(character);
- ADVANCE_TO(ContinueBogusCommentState);
+ HTML_BEGIN_STATE(ContinueBogusCommentState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker)
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ else {
+ m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(ContinueBogusCommentState);
+ }
+ }
END_STATE()
- BEGIN_STATE(MarkupDeclarationOpenState)
- if (character == '-') {
- auto result = source.advancePast("--");
+ HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
+ DEFINE_STATIC_LOCAL(String, dashDashString, (ASCIILiteral("--")));
+ DEFINE_STATIC_LOCAL(String, doctypeString, (ASCIILiteral("doctype")));
+ DEFINE_STATIC_LOCAL(String, cdataString, (ASCIILiteral("[CDATA[")));
+ if (cc == '-') {
+ SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
if (result == SegmentedString::DidMatch) {
- m_token.beginComment();
- SWITCH_TO(CommentStartState);
- }
- if (result == SegmentedString::NotEnoughCharacters)
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
- } else if (isASCIIAlphaCaselessEqual(character, 'd')) {
- auto result = source.advancePastIgnoringCase("doctype");
- if (result == SegmentedString::DidMatch)
- SWITCH_TO(DOCTYPEState);
- if (result == SegmentedString::NotEnoughCharacters)
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
- } else if (character == '[' && shouldAllowCDATA()) {
- auto result = source.advancePast("[CDATA[");
- if (result == SegmentedString::DidMatch)
- SWITCH_TO(CDATASectionState);
- if (result == SegmentedString::NotEnoughCharacters)
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ source.advanceAndASSERT('-');
+ source.advanceAndASSERT('-');
+ m_token->beginComment();
+ HTML_SWITCH_TO(CommentStartState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ } else if (cc == 'D' || cc == 'd') {
+ SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERTIgnoringCase(source, "doctype");
+ HTML_SWITCH_TO(DOCTYPEState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ } else if (cc == '[' && shouldAllowCDATA()) {
+ SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERT(source, "[CDATA[");
+ HTML_SWITCH_TO(CDATASectionState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
}
parseError();
- RECONSUME_IN(BogusCommentState);
+ HTML_RECONSUME_IN(BogusCommentState);
+ }
END_STATE()
- BEGIN_STATE(CommentStartState)
- if (character == '-')
- ADVANCE_TO(CommentStartDashState);
- if (character == '>') {
+ HTML_BEGIN_STATE(CommentStartState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentStartDashState);
+ else if (cc == '>') {
parseError();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeInDataState();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentState);
}
- m_token.appendToComment(character);
- ADVANCE_TO(CommentState);
+ }
END_STATE()
- BEGIN_STATE(CommentStartDashState)
- if (character == '-')
- ADVANCE_TO(CommentEndState);
- if (character == '>') {
+ HTML_BEGIN_STATE(CommentStartDashState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentEndState);
+ else if (cc == '>') {
parseError();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeInDataState();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToComment('-');
+ m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentState);
}
- m_token.appendToComment('-');
- m_token.appendToComment(character);
- ADVANCE_TO(CommentState);
+ }
END_STATE()
- BEGIN_STATE(CommentState)
- if (character == '-')
- ADVANCE_TO(CommentEndDashState);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(CommentState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentEndDashState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeInDataState();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentState);
}
- m_token.appendToComment(character);
- ADVANCE_TO(CommentState);
+ }
END_STATE()
- BEGIN_STATE(CommentEndDashState)
- if (character == '-')
- ADVANCE_TO(CommentEndState);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(CommentEndDashState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentEndState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeInDataState();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToComment('-');
+ m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentState);
}
- m_token.appendToComment('-');
- m_token.appendToComment(character);
- ADVANCE_TO(CommentState);
+ }
END_STATE()
- BEGIN_STATE(CommentEndState)
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == '!') {
+ HTML_BEGIN_STATE(CommentEndState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == '!') {
parseError();
- ADVANCE_TO(CommentEndBangState);
- }
- if (character == '-') {
+ HTML_ADVANCE_TO(CommentEndBangState);
+ } else if (cc == '-') {
parseError();
- m_token.appendToComment('-');
- ADVANCE_TO(CommentEndState);
- }
- if (character == kEndOfFileMarker) {
+ m_token->appendToComment('-');
+ HTML_ADVANCE_TO(CommentEndState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- return emitAndReconsumeInDataState();
- }
- parseError();
- m_token.appendToComment('-');
- m_token.appendToComment('-');
- m_token.appendToComment(character);
- ADVANCE_TO(CommentState);
- END_STATE()
-
- BEGIN_STATE(CommentEndBangState)
- if (character == '-') {
- m_token.appendToComment('-');
- m_token.appendToComment('-');
- m_token.appendToComment('!');
- ADVANCE_TO(CommentEndDashState);
- }
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == kEndOfFileMarker) {
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
parseError();
- return emitAndReconsumeInDataState();
+ m_token->appendToComment('-');
+ m_token->appendToComment('-');
+ m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentState);
}
- m_token.appendToComment('-');
- m_token.appendToComment('-');
- m_token.appendToComment('!');
- m_token.appendToComment(character);
- ADVANCE_TO(CommentState);
+ }
END_STATE()
- BEGIN_STATE(DOCTYPEState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeDOCTYPENameState);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(CommentEndBangState) {
+ if (cc == '-') {
+ m_token->appendToComment('-');
+ m_token->appendToComment('-');
+ m_token->appendToComment('!');
+ HTML_ADVANCE_TO(CommentEndDashState);
+ } else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- m_token.beginDOCTYPE();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToComment('-');
+ m_token->appendToComment('-');
+ m_token->appendToComment('!');
+ m_token->appendToComment(cc);
+ HTML_ADVANCE_TO(CommentState);
}
- parseError();
- RECONSUME_IN(BeforeDOCTYPENameState);
+ }
END_STATE()
- BEGIN_STATE(BeforeDOCTYPENameState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeDOCTYPENameState);
- if (character == '>') {
+ HTML_BEGIN_STATE(DOCTYPEState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPENameState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- m_token.beginDOCTYPE();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->beginDOCTYPE();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
parseError();
- m_token.beginDOCTYPE();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ HTML_RECONSUME_IN(BeforeDOCTYPENameState);
}
- m_token.beginDOCTYPE(toASCIILower(character));
- ADVANCE_TO(DOCTYPENameState);
+ }
END_STATE()
- BEGIN_STATE(DOCTYPENameState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(AfterDOCTYPENameState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPENameState);
+ else if (isASCIIUpper(cc)) {
+ m_token->beginDOCTYPE(toLowerCase(cc));
+ HTML_ADVANCE_TO(DOCTYPENameState);
+ } else if (cc == '>') {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->beginDOCTYPE();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->beginDOCTYPE();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->beginDOCTYPE(cc);
+ HTML_ADVANCE_TO(DOCTYPENameState);
}
- m_token.appendToName(toASCIILower(character));
- ADVANCE_TO(DOCTYPENameState);
+ }
END_STATE()
- BEGIN_STATE(AfterDOCTYPENameState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(AfterDOCTYPENameState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(DOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterDOCTYPENameState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->appendToName(toLowerCase(cc));
+ HTML_ADVANCE_TO(DOCTYPENameState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
- }
- if (isASCIIAlphaCaselessEqual(character, 'p')) {
- auto result = source.advancePastIgnoringCase("public");
- if (result == SegmentedString::DidMatch)
- SWITCH_TO(AfterDOCTYPEPublicKeywordState);
- if (result == SegmentedString::NotEnoughCharacters)
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
- } else if (isASCIIAlphaCaselessEqual(character, 's')) {
- auto result = source.advancePastIgnoringCase("system");
- if (result == SegmentedString::DidMatch)
- SWITCH_TO(AfterDOCTYPESystemKeywordState);
- if (result == SegmentedString::NotEnoughCharacters)
- RETURN_IN_CURRENT_STATE(haveBufferedCharacterToken());
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToName(cc);
+ HTML_ADVANCE_TO(DOCTYPENameState);
}
- parseError();
- m_token.setForceQuirks();
- ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(AfterDOCTYPEPublicKeywordState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
- if (character == '"') {
+ HTML_BEGIN_STATE(AfterDOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterDOCTYPENameState);
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setPublicIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
- }
- if (character == '\'') {
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ DEFINE_STATIC_LOCAL(String, publicString, (ASCIILiteral("public")));
+ DEFINE_STATIC_LOCAL(String, systemString, (ASCIILiteral("system")));
+ if (cc == 'P' || cc == 'p') {
+ SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERTIgnoringCase(source, "public");
+ HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ } else if (cc == 'S' || cc == 's') {
+ SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERTIgnoringCase(source, "system");
+ HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ }
parseError();
- m_token.setPublicIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ m_token->setForceQuirks();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- if (character == '>') {
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ else if (cc == '"') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setPublicIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ parseError();
+ m_token->setPublicIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- parseError();
- m_token.setForceQuirks();
- ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
- if (character == '"') {
- m_token.setPublicIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
- }
- if (character == '\'') {
- m_token.setPublicIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
- }
- if (character == '>') {
+ HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ else if (cc == '"') {
+ m_token->setPublicIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ m_token->setPublicIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ } else if (cc == '>') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- parseError();
- m_token.setForceQuirks();
- ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState)
- if (character == '"')
- ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
- if (character == '>') {
+ HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
+ if (cc == '"')
+ HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+ else if (cc == '>') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToPublicIdentifier(cc);
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
}
- m_token.appendToPublicIdentifier(character);
- ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ }
END_STATE()
- BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState)
- if (character == '\'')
- ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
- if (character == '>') {
+ HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
+ if (cc == '\'')
+ HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+ else if (cc == '>') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToPublicIdentifier(cc);
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
}
- m_token.appendToPublicIdentifier(character);
- ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ }
END_STATE()
- BEGIN_STATE(AfterDOCTYPEPublicIdentifierState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == '"') {
+ HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == '"') {
parseError();
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
- }
- if (character == '\'') {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
parseError();
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
- }
- parseError();
- m_token.setForceQuirks();
- ADVANCE_TO(BogusDOCTYPEState);
- END_STATE()
-
- BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == '"') {
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
- }
- if (character == '\'') {
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- parseError();
- m_token.setForceQuirks();
- ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(AfterDOCTYPESystemKeywordState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
- if (character == '"') {
+ HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == '"') {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
parseError();
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ m_token->setForceQuirks();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- if (character == '\'') {
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ else if (cc == '"') {
parseError();
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- }
- if (character == '>') {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- parseError();
- m_token.setForceQuirks();
- ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(BeforeDOCTYPESystemIdentifierState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
- if (character == '"') {
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
- }
- if (character == '\'') {
- m_token.setSystemIdentifierToEmptyString();
- ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
- }
- if (character == '>') {
+ HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ if (cc == '"') {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ m_token->setSystemIdentifierToEmptyString();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == '>') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- parseError();
- m_token.setForceQuirks();
- ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState)
- if (character == '"')
- ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
- if (character == '>') {
+ HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
+ if (cc == '"')
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToSystemIdentifier(cc);
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
}
- m_token.appendToSystemIdentifier(character);
- ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ }
END_STATE()
- BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState)
- if (character == '\'')
- ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
- if (character == '>') {
+ HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
+ if (cc == '\'')
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>') {
parseError();
- m_token.setForceQuirks();
- return emitAndResumeInDataState(source);
- }
- if (character == kEndOfFileMarker) {
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->appendToSystemIdentifier(cc);
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
}
- m_token.appendToSystemIdentifier(character);
- ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ }
END_STATE()
- BEGIN_STATE(AfterDOCTYPESystemIdentifierState)
- if (isTokenizerWhitespace(character))
- ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == kEndOfFileMarker) {
+ HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
parseError();
- m_token.setForceQuirks();
- return emitAndReconsumeInDataState();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
}
- parseError();
- ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(BogusDOCTYPEState)
- if (character == '>')
- return emitAndResumeInDataState(source);
- if (character == kEndOfFileMarker)
- return emitAndReconsumeInDataState();
- ADVANCE_TO(BogusDOCTYPEState);
+ HTML_BEGIN_STATE(BogusDOCTYPEState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker)
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
END_STATE()
- BEGIN_STATE(CDATASectionState)
- if (character == ']')
- ADVANCE_TO(CDATASectionRightSquareBracketState);
- if (character == kEndOfFileMarker)
- RECONSUME_IN(DataState);
- bufferCharacter(character);
- ADVANCE_TO(CDATASectionState);
+ HTML_BEGIN_STATE(CDATASectionState) {
+ if (cc == ']')
+ HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
+ else if (cc == kEndOfFileMarker)
+ HTML_RECONSUME_IN(DataState);
+ else {
+ bufferCharacter(cc);
+ HTML_ADVANCE_TO(CDATASectionState);
+ }
+ }
END_STATE()
- BEGIN_STATE(CDATASectionRightSquareBracketState)
- if (character == ']')
- ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
- bufferASCIICharacter(']');
- RECONSUME_IN(CDATASectionState);
- END_STATE()
+ HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
+ if (cc == ']')
+ HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
+ else {
+ bufferCharacter(']');
+ HTML_RECONSUME_IN(CDATASectionState);
+ }
+ }
- BEGIN_STATE(CDATASectionDoubleRightSquareBracketState)
- if (character == '>')
- ADVANCE_TO(DataState);
- bufferASCIICharacter(']');
- bufferASCIICharacter(']');
- RECONSUME_IN(CDATASectionState);
+ HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
+ if (cc == '>')
+ HTML_ADVANCE_TO(DataState);
+ else {
+ bufferCharacter(']');
+ bufferCharacter(']');
+ HTML_RECONSUME_IN(CDATASectionState);
+ }
+ }
END_STATE()
}
@@ -1409,45 +1579,39 @@ String HTMLTokenizer::bufferedCharacters() const
void HTMLTokenizer::updateStateFor(const AtomicString& tagName)
{
if (tagName == textareaTag || tagName == titleTag)
- m_state = RCDATAState;
+ setState(HTMLTokenizer::RCDATAState);
else if (tagName == plaintextTag)
- m_state = PLAINTEXTState;
+ setState(HTMLTokenizer::PLAINTEXTState);
else if (tagName == scriptTag)
- m_state = ScriptDataState;
+ setState(HTMLTokenizer::ScriptDataState);
else if (tagName == styleTag
|| tagName == iframeTag
|| tagName == xmpTag
|| (tagName == noembedTag && m_options.pluginsEnabled)
|| tagName == noframesTag
|| (tagName == noscriptTag && m_options.scriptEnabled))
- m_state = RAWTEXTState;
-}
-
-inline void HTMLTokenizer::appendToTemporaryBuffer(UChar character)
-{
- ASSERT(isASCII(character));
- m_temporaryBuffer.append(character);
+ setState(HTMLTokenizer::RAWTEXTState);
}
-inline bool HTMLTokenizer::temporaryBufferIs(const char* expectedString)
+inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
{
return vectorEqualsString(m_temporaryBuffer, expectedString);
}
-inline void HTMLTokenizer::appendToPossibleEndTag(UChar character)
+inline void HTMLTokenizer::addToPossibleEndTag(LChar cc)
{
- ASSERT(isASCII(character));
- m_bufferedEndTagName.append(character);
+ ASSERT(isEndTagBufferingState(m_state));
+ m_bufferedEndTagName.append(cc);
}
-inline bool HTMLTokenizer::isAppropriateEndTag() const
+inline bool HTMLTokenizer::isAppropriateEndTag()
{
if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
return false;
- unsigned size = m_bufferedEndTagName.size();
+ size_t numCharacters = m_bufferedEndTagName.size();
- for (unsigned i = 0; i < size; i++) {
+ for (size_t i = 0; i < numCharacters; i++) {
if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
return false;
}
@@ -1457,6 +1621,7 @@ inline bool HTMLTokenizer::isAppropriateEndTag() const
inline void HTMLTokenizer::parseError()
{
+ notImplemented();
}
}