diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-04-10 09:28:39 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-04-10 09:28:39 +0000 |
commit | 32761a6cee1d0dee366b885b7b9c777e67885688 (patch) | |
tree | d6bec92bebfb216f4126356e55518842c2f476a1 /Source/WebCore/html/parser/HTMLDocumentParser.cpp | |
parent | a4e969f4965059196ca948db781e52f7cfebf19e (diff) | |
download | WebKitGtk-tarball-32761a6cee1d0dee366b885b7b9c777e67885688.tar.gz |
webkitgtk-2.4.11webkitgtk-2.4.11
Diffstat (limited to 'Source/WebCore/html/parser/HTMLDocumentParser.cpp')
-rw-r--r-- | Source/WebCore/html/parser/HTMLDocumentParser.cpp | 226 |
1 files changed, 135 insertions, 91 deletions
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.cpp b/Source/WebCore/html/parser/HTMLDocumentParser.cpp index eddcc8d8f..488862049 100644 --- a/Source/WebCore/html/parser/HTMLDocumentParser.cpp +++ b/Source/WebCore/html/parser/HTMLDocumentParser.cpp @@ -1,6 +1,5 @@ /* * Copyright (C) 2010 Google, Inc. All Rights Reserved. - * Copyright (C) 2015 Apple Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -27,59 +26,90 @@ #include "config.h" #include "HTMLDocumentParser.h" +#include "ContentSecurityPolicy.h" #include "DocumentFragment.h" +#include "DocumentLoader.h" #include "Frame.h" -#include "HTMLDocument.h" #include "HTMLParserScheduler.h" -#include "HTMLPreloadScanner.h" #include "HTMLScriptRunner.h" #include "HTMLTreeBuilder.h" +#include "HTMLDocument.h" +#include "InspectorInstrumentation.h" +#include "Settings.h" +#include <wtf/Ref.h> namespace WebCore { using namespace HTMLNames; +// This is a direct transcription of step 4 from: +// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case +static HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors, const HTMLParserOptions& options) +{ + if (!contextElement) + return HTMLTokenizer::DataState; + + const QualifiedName& contextTag = contextElement->tagQName(); + + if (contextTag.matches(titleTag) || contextTag.matches(textareaTag)) + return HTMLTokenizer::RCDATAState; + if (contextTag.matches(styleTag) + || contextTag.matches(xmpTag) + || contextTag.matches(iframeTag) + || (contextTag.matches(noembedTag) && options.pluginsEnabled) + || (contextTag.matches(noscriptTag) && options.scriptEnabled) + || contextTag.matches(noframesTag)) + return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState; + if (contextTag.matches(scriptTag)) + return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState; + if (contextTag.matches(plaintextTag)) + return HTMLTokenizer::PLAINTEXTState; + return HTMLTokenizer::DataState; +} + HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document) : ScriptableDocumentParser(document) , m_options(document) - , m_tokenizer(m_options) + , m_token(std::make_unique<HTMLToken>()) + , m_tokenizer(std::make_unique<HTMLTokenizer>(m_options)) , m_scriptRunner(std::make_unique<HTMLScriptRunner>(document, static_cast<HTMLScriptRunnerHost&>(*this))) , m_treeBuilder(std::make_unique<HTMLTreeBuilder>(*this, document, parserContentPolicy(), m_options)) , m_parserScheduler(std::make_unique<HTMLParserScheduler>(*this)) , m_xssAuditorDelegate(document) , m_preloader(std::make_unique<HTMLResourcePreloader>(document)) + , m_endWasDelayed(false) + , m_haveBackgroundParser(false) + , m_pumpSessionNestingLevel(0) { + ASSERT(m_token); + ASSERT(m_tokenizer); } -Ref<HTMLDocumentParser> HTMLDocumentParser::create(HTMLDocument& document) -{ - return adoptRef(*new HTMLDocumentParser(document)); -} - -inline HTMLDocumentParser::HTMLDocumentParser(DocumentFragment& fragment, Element& contextElement, ParserContentPolicy rawPolicy) - : ScriptableDocumentParser(fragment.document(), rawPolicy) +// FIXME: Member variables should be grouped into self-initializing structs to +// minimize code duplication between these constructors. +HTMLDocumentParser::HTMLDocumentParser(DocumentFragment& fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) + : ScriptableDocumentParser(fragment.document(), parserContentPolicy) , m_options(fragment.document()) - , m_tokenizer(m_options) - , m_treeBuilder(std::make_unique<HTMLTreeBuilder>(*this, fragment, contextElement, parserContentPolicy(), m_options)) + , m_token(std::make_unique<HTMLToken>()) + , m_tokenizer(std::make_unique<HTMLTokenizer>(m_options)) + , m_treeBuilder(std::make_unique<HTMLTreeBuilder>(*this, fragment, contextElement, this->parserContentPolicy(), m_options)) , m_xssAuditorDelegate(fragment.document()) + , m_endWasDelayed(false) + , m_haveBackgroundParser(false) + , m_pumpSessionNestingLevel(0) { - // https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments - if (contextElement.isHTMLElement()) - m_tokenizer.updateStateFor(contextElement.tagQName().localName()); + bool reportErrors = false; // For now document fragment parsing never reports errors. + m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options)); m_xssAuditor.initForFragment(); } -inline Ref<HTMLDocumentParser> HTMLDocumentParser::create(DocumentFragment& fragment, Element& contextElement, ParserContentPolicy parserContentPolicy) -{ - return adoptRef(*new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); -} - HTMLDocumentParser::~HTMLDocumentParser() { ASSERT(!m_parserScheduler); ASSERT(!m_pumpSessionNestingLevel); ASSERT(!m_preloadScanner); ASSERT(!m_insertionPreloadScanner); + ASSERT(!m_haveBackgroundParser); } void HTMLDocumentParser::detach() @@ -88,6 +118,7 @@ void HTMLDocumentParser::detach() if (m_scriptRunner) m_scriptRunner->detach(); + m_treeBuilder->detach(); // FIXME: It seems wrong that we would have a preload scanner here. // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do. m_preloadScanner = nullptr; @@ -102,10 +133,12 @@ void HTMLDocumentParser::stopParsing() } // This kicks off "Once the user agent stops parsing" as described by: -// https://html.spec.whatwg.org/multipage/syntax.html#the-end +// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end void HTMLDocumentParser::prepareToStopParsing() { - ASSERT(!hasInsertionPoint()); + // FIXME: It may not be correct to disable this for the background parser. + // That means hasInsertionPoint() may not be correct in some cases. + ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. @@ -132,16 +165,6 @@ void HTMLDocumentParser::prepareToStopParsing() attemptToRunDeferredScriptsAndEnd(); } -inline bool HTMLDocumentParser::inPumpSession() const -{ - return m_pumpSessionNestingLevel > 0; -} - -inline bool HTMLDocumentParser::shouldDelayEnd() const -{ - return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); -} - bool HTMLDocumentParser::isParsingFragment() const { return m_treeBuilder->isParsingFragment(); @@ -149,7 +172,7 @@ bool HTMLDocumentParser::isParsingFragment() const bool HTMLDocumentParser::processingData() const { - return isScheduledForResume() || inPumpSession(); + return isScheduledForResume() || inPumpSession() || m_haveBackgroundParser; } void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode) @@ -178,8 +201,8 @@ void HTMLDocumentParser::resumeParsingAfterYield() // but we need to ensure it isn't deleted yet. Ref<HTMLDocumentParser> protect(*this); - // We should never be here unless we can pump immediately. - // Call pumpTokenizer() directly so that ASSERTS will fire if we're wrong. + // We should never be here unless we can pump immediately. Call pumpTokenizer() + // directly so that ASSERTS will fire if we're wrong. pumpTokenizer(AllowYield); endIfDelayed(); } @@ -189,11 +212,10 @@ void HTMLDocumentParser::runScriptsForPausedTreeBuilder() ASSERT(scriptingContentIsAllowed(parserContentPolicy())); TextPosition scriptStartPosition = TextPosition::belowRangePosition(); - if (auto scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition)) { - // We will not have a scriptRunner when parsing a DocumentFragment. - if (m_scriptRunner) - m_scriptRunner->execute(scriptElement.release(), scriptStartPosition); - } + RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition); + // We will not have a scriptRunner when parsing a DocumentFragment. + if (m_scriptRunner) + m_scriptRunner->execute(scriptElement.release(), scriptStartPosition); } bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& session) @@ -201,6 +223,8 @@ bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& ses if (isStopped()) return false; + ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); + if (isWaitingForScripts()) { if (mode == AllowYield) m_parserScheduler->checkForYieldBeforeScript(session); @@ -215,11 +239,14 @@ bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& ses return false; } - // FIXME: It's wrong for the HTMLDocumentParser to reach back to the Frame, but this approach is - // how the parser has always handled stopping when the page assigns window.location. What should - // happen instead is that assigning window.location causes the parser to stop parsing cleanly. - // The problem is we're not prepared to do that at every point where we run JavaScript. - if (!isParsingFragment() && document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) + // FIXME: It's wrong for the HTMLDocumentParser to reach back to the + // Frame, but this approach is how the old parser handled + // stopping when the page assigns window.location. What really + // should happen is that assigning window.location causes the + // parser to stop parsing cleanly. The problem is we're not + // perpared to do that at every point where we run JavaScript. + if (!isParsingFragment() + && document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) return false; if (mode == AllowYield) @@ -228,12 +255,17 @@ bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& ses return true; } +void HTMLDocumentParser::forcePlaintextForTextDocument() +{ + m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); +} + Document* HTMLDocumentParser::contextForParsingSession() { // The parsing session should interact with the document only when parsing // non-fragments. Otherwise, we might delay the load event mistakenly. if (isParsingFragment()) - return nullptr; + return 0; return document(); } @@ -241,32 +273,41 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) { ASSERT(!isStopped()); ASSERT(!isScheduledForResume()); - - // This is an attempt to check that this object is both attached to the Document and protected by something. + // ASSERT that this object is both attached to the Document and protected. ASSERT(refCount() >= 2); + ASSERT(m_tokenizer); + ASSERT(m_token); + ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); PumpSession session(m_pumpSessionNestingLevel, contextForParsingSession()); + // We tell the InspectorInstrumentation about every pump, even if we + // end up pumping nothing. It can filter out empty pumps itself. + // FIXME: m_input.current().length() is only accurate if we + // end up parsing the whole buffer in this pump. We should pass how + // much we parsed as part of didWriteHTML instead of willWriteHTML. + InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().currentLine().zeroBasedInt()); + m_xssAuditor.init(document(), &m_xssAuditorDelegate); while (canTakeNextToken(mode, session) && !session.needsYield) { if (!isParsingFragment()) - m_sourceTracker.startToken(m_input.current(), m_tokenizer); + m_sourceTracker.start(m_input.current(), m_tokenizer.get(), token()); - auto token = m_tokenizer.nextToken(m_input.current()); - if (!token) + if (!m_tokenizer->nextToken(m_input.current(), token())) break; if (!isParsingFragment()) { - m_sourceTracker.endToken(m_input.current(), m_tokenizer); + m_sourceTracker.end(m_input.current(), m_tokenizer.get(), token()); // We do not XSS filter innerHTML, which means we (intentionally) fail // http/tests/security/xssAuditor/dom-write-innerHTML.html - if (auto xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(*token, m_sourceTracker, m_tokenizer.shouldAllowCDATA()))) + if (auto xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(token(), m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) m_xssAuditorDelegate.didBlockScript(*xssInfo); } - constructTreeFromHTMLToken(token); + constructTreeFromHTMLToken(token()); + ASSERT(token().isUninitialized()); } // Ensure we haven't been totally deref'ed after pumping. Any caller of this @@ -280,18 +321,20 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) m_parserScheduler->scheduleForResume(); if (isWaitingForScripts()) { - ASSERT(m_tokenizer.isInDataState()); + ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); if (!m_preloadScanner) { m_preloadScanner = std::make_unique<HTMLPreloadScanner>(m_options, document()->url(), document()->deviceScaleFactor()); m_preloadScanner->appendToEnd(m_input.current()); } - m_preloadScanner->scan(*m_preloader, *document()); + m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); } + + InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt()); } -void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLTokenizer::TokenPtr& rawToken) +void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken& rawToken) { - AtomicHTMLToken token(*rawToken); + AtomicHTMLToken token(rawToken); // We clear the rawToken in case constructTreeFromAtomicToken // synchronously re-enters the parser. We don't clear the token immedately @@ -303,21 +346,24 @@ void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLTokenizer::TokenPtr& raw // FIXME: Stop clearing the rawToken once we start running the parser off // the main thread or once we stop allowing synchronous JavaScript // execution from parseAttribute. - if (rawToken->type() != HTMLToken::Character) { - // Clearing the TokenPtr makes sure we don't clear the HTMLToken a second time - // later when the TokenPtr is destroyed. + if (rawToken.type() != HTMLToken::Character) rawToken.clear(); - } - m_treeBuilder->constructTree(token); + m_treeBuilder->constructTree(&token); + + if (!rawToken.isUninitialized()) { + ASSERT(rawToken.type() == HTMLToken::Character); + rawToken.clear(); + } } bool HTMLDocumentParser::hasInsertionPoint() { // FIXME: The wasCreatedByScript() branch here might not be fully correct. - // Our model of the EOF character differs slightly from the one in the spec - // because our treatment is uniform between network-sourced and script-sourced - // input streams whereas the spec treats them differently. + // Our model of the EOF character differs slightly from the one in + // the spec because our treatment is uniform between network-sourced + // and script-sourced input streams whereas the spec treats them + // differently. return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile()); } @@ -338,16 +384,17 @@ void HTMLDocumentParser::insert(const SegmentedString& source) if (isWaitingForScripts()) { // Check the document.write() output with a separate preload scanner as // the main scanner can't deal with insertions. - if (!m_insertionPreloadScanner) + if (!m_insertionPreloadScanner) { m_insertionPreloadScanner = std::make_unique<HTMLPreloadScanner>(m_options, document()->url(), document()->deviceScaleFactor()); + } m_insertionPreloadScanner->appendToEnd(source); - m_insertionPreloadScanner->scan(*m_preloader, *document()); + m_insertionPreloadScanner->scan(m_preloader.get(), document()->baseElementURL()); } endIfDelayed(); } -void HTMLDocumentParser::append(RefPtr<StringImpl>&& inputSource) +void HTMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) { if (isStopped()) return; @@ -355,8 +402,7 @@ void HTMLDocumentParser::append(RefPtr<StringImpl>&& inputSource) // pumpTokenizer can cause this parser to be detached from the Document, // but we need to ensure it isn't deleted yet. Ref<HTMLDocumentParser> protect(*this); - - String source(WTFMove(inputSource)); + String source(inputSource); if (m_preloadScanner) { if (m_input.current().isEmpty() && !isWaitingForScripts()) { @@ -366,7 +412,7 @@ void HTMLDocumentParser::append(RefPtr<StringImpl>&& inputSource) } else { m_preloadScanner->appendToEnd(source); if (isWaitingForScripts()) - m_preloadScanner->scan(*m_preloader, *document()); + m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); } } @@ -396,7 +442,9 @@ void HTMLDocumentParser::end() void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd() { ASSERT(isStopping()); - ASSERT(!hasInsertionPoint()); + // FIXME: It may not be correct to disable this for the background parser. + // That means hasInsertionPoint() may not be correct in some cases. + ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing()) return; end(); @@ -444,18 +492,18 @@ void HTMLDocumentParser::finish() bool HTMLDocumentParser::isExecutingScript() const { - return m_scriptRunner && m_scriptRunner->isExecutingScript(); + if (!m_scriptRunner) + return false; + return m_scriptRunner->isExecutingScript(); } TextPosition HTMLDocumentParser::textPosition() const { - auto& currentString = m_input.current(); - return TextPosition(currentString.currentLine(), currentString.currentColumn()); -} + const SegmentedString& currentString = m_input.current(); + OrdinalNumber line = currentString.currentLine(); + OrdinalNumber column = currentString.currentColumn(); -bool HTMLDocumentParser::shouldAssociateConsoleMessagesWithTextPosition() const -{ - return inPumpSession() && !isExecutingScript(); + return TextPosition(line, column); } bool HTMLDocumentParser::isWaitingForScripts() const @@ -479,10 +527,6 @@ void HTMLDocumentParser::resumeParsingAfterScriptExecution() ASSERT(!isExecutingScript()); ASSERT(!isWaitingForScripts()); - // pumpTokenizer can cause this parser to be detached from the Document, - // but we need to ensure it isn't deleted yet. - Ref<HTMLDocumentParser> protect(*this); - m_insertionPreloadScanner = nullptr; pumpTokenizerIfPossible(AllowYield); endIfDelayed(); @@ -501,12 +545,12 @@ void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript) { cachedScript->removeClient(this); } - + void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan() { ASSERT(m_preloadScanner); m_preloadScanner->appendToEnd(m_input.current()); - m_preloadScanner->scan(*m_preloader, *document()); + m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); } void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource) @@ -546,13 +590,13 @@ void HTMLDocumentParser::executeScriptsWaitingForStylesheets() resumeParsingAfterScriptExecution(); } -void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment& fragment, Element& contextElement, ParserContentPolicy parserContentPolicy) +void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment& fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) { - auto parser = create(fragment, contextElement, parserContentPolicy); + RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy); parser->insert(source); // Use insert() so that the parser will not yield. parser->finish(); - ASSERT(!parser->processingData()); - parser->detach(); + ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151> + parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. } void HTMLDocumentParser::suspendScheduledTasks() |