/* * Copyright (C) 2015 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "DFABytecodeCompiler.h" #if ENABLE(CONTENT_EXTENSIONS) #include "ContentExtensionRule.h" #include "DFA.h" #include "DFANode.h" namespace WebCore { namespace ContentExtensions { template inline void append(Vector& bytecode, IntType value) { bytecode.resize(bytecode.size() + sizeof(IntType)); *reinterpret_cast(&bytecode[bytecode.size() - sizeof(IntType)]) = value; } inline void appendZeroes(Vector& bytecode, DFABytecodeJumpSize jumpSize) { switch (jumpSize) { case DFABytecodeJumpSize::Int8: append(bytecode, 0); // This value will be set when linking. break; case DFABytecodeJumpSize::Int16: append(bytecode, 0); // This value will be set when linking. break; case DFABytecodeJumpSize::Int24: append(bytecode, 0); append(bytecode, 0); // These values will be set when linking. break; case DFABytecodeJumpSize::Int32: append(bytecode, 0); // This value will be set when linking. break; } } template inline void setBits(Vector& bytecode, uint32_t index, IntType value) { RELEASE_ASSERT(index + sizeof(IntType) <= bytecode.size()); ASSERT_WITH_MESSAGE(!*reinterpret_cast(&bytecode[index]), "Right now we should only be using setBits to overwrite values that were zero as a placeholder."); *reinterpret_cast(&bytecode[index]) = value; } static unsigned appendActionBytecodeSize(uint64_t action) { if (action & ActionFlagMask) return sizeof(DFABytecodeInstruction) + sizeof(uint16_t) + sizeof(uint32_t); return sizeof(DFABytecodeInstruction) + sizeof(uint32_t); } void DFABytecodeCompiler::emitAppendAction(uint64_t action) { // High bits are used to store flags. See compileRuleList. if (action & ActionFlagMask) { if (action & IfDomainFlag) append(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendActionWithIfDomain); else append(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendAction); append(m_bytecode, static_cast(action >> 32)); append(m_bytecode, static_cast(action)); } else { if (action & IfDomainFlag) append(m_bytecode, DFABytecodeInstruction::AppendActionWithIfDomain); else append(m_bytecode, DFABytecodeInstruction::AppendAction); append(m_bytecode, static_cast(action)); } } int32_t DFABytecodeCompiler::longestPossibleJump(uint32_t instructionLocation, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex) { if (m_nodeStartOffsets[destinationNodeIndex] == std::numeric_limits::max()) { // Jumping to a node that hasn't been compiled yet, we don't know exactly how far forward we will need to jump, // so make sure we have enough room for the worst possible case, the farthest possible jump // which would be the distance if there were no compacted branches between this jump and its destination. ASSERT(instructionLocation >= m_nodeStartOffsets[sourceNodeIndex]); ASSERT(m_maxNodeStartOffsets[destinationNodeIndex] > m_maxNodeStartOffsets[sourceNodeIndex]); ASSERT(m_nodeStartOffsets[sourceNodeIndex] != std::numeric_limits::max()); return m_maxNodeStartOffsets[destinationNodeIndex] - m_maxNodeStartOffsets[sourceNodeIndex] - (m_nodeStartOffsets[sourceNodeIndex] - instructionLocation); } // Jumping to an already compiled node, we already know exactly where we will need to jump to. ASSERT(m_nodeStartOffsets[destinationNodeIndex] <= instructionLocation); return m_nodeStartOffsets[destinationNodeIndex] - instructionLocation; } void DFABytecodeCompiler::emitJump(uint32_t sourceNodeIndex, uint32_t destinationNodeIndex) { uint32_t instructionLocation = m_bytecode.size(); uint32_t jumpLocation = instructionLocation + sizeof(uint8_t); int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex); DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); append(m_bytecode, static_cast(DFABytecodeInstruction::Jump) | jumpSize); m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); appendZeroes(m_bytecode, jumpSize); } void DFABytecodeCompiler::emitCheckValue(uint8_t value, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive) { uint32_t instructionLocation = m_bytecode.size(); uint32_t jumpLocation = instructionLocation + 2 * sizeof(uint8_t); int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex); DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueCaseSensitive : DFABytecodeInstruction::CheckValueCaseInsensitive; append(m_bytecode, static_cast(instruction) | jumpSize); append(m_bytecode, value); m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); appendZeroes(m_bytecode, jumpSize); } void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive) { ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is strictly less than highValue."); uint32_t instructionLocation = m_bytecode.size(); uint32_t jumpLocation = instructionLocation + 3 * sizeof(uint8_t); int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex); DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueRangeCaseSensitive : DFABytecodeInstruction::CheckValueRangeCaseInsensitive; append(m_bytecode, static_cast(instruction) | jumpSize); append(m_bytecode, lowValue); append(m_bytecode, highValue); m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); appendZeroes(m_bytecode, jumpSize); } void DFABytecodeCompiler::emitTerminate() { append(m_bytecode, DFABytecodeInstruction::Terminate); } void DFABytecodeCompiler::compileNode(uint32_t index, bool root) { unsigned startSize = m_bytecode.size(); const DFANode& node = m_dfa.nodes[index]; if (node.isKilled()) { ASSERT(m_nodeStartOffsets[index] == std::numeric_limits::max()); return; } // Record starting index for linking. if (!root) m_nodeStartOffsets[index] = m_bytecode.size(); for (uint64_t action : node.actions(m_dfa)) emitAppendAction(action); // If we jump to the root, we don't want to re-add its actions to a HashSet. // We know we have already added them because the root is always compiled first and we always start interpreting at the beginning. if (root) m_nodeStartOffsets[index] = m_bytecode.size(); compileNodeTransitions(index); ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= compiledNodeMaxBytecodeSize(index)); } unsigned DFABytecodeCompiler::compiledNodeMaxBytecodeSize(uint32_t index) { const DFANode& node = m_dfa.nodes[index]; if (node.isKilled()) return 0; unsigned size = 0; for (uint64_t action : node.actions(m_dfa)) size += appendActionBytecodeSize(action); size += nodeTransitionsMaxBytecodeSize(node); return size; } DFABytecodeCompiler::JumpTable DFABytecodeCompiler::extractJumpTable(Vector& ranges, unsigned firstRange, unsigned lastRange) { ASSERT(lastRange > firstRange); ASSERT(lastRange < ranges.size()); JumpTable jumpTable; jumpTable.min = ranges[firstRange].min; jumpTable.max = ranges[lastRange].max; jumpTable.caseSensitive = ranges[lastRange].caseSensitive; unsigned size = lastRange - firstRange + 1; jumpTable.destinations.reserveInitialCapacity(size); for (unsigned i = firstRange; i <= lastRange; ++i) { const Range& range = ranges[i]; ASSERT(range.caseSensitive == jumpTable.caseSensitive); ASSERT(range.min == range.max); ASSERT(range.min >= jumpTable.min); ASSERT(range.min <= jumpTable.max); jumpTable.destinations.append(range.destination); } ranges.remove(firstRange, size); return jumpTable; } DFABytecodeCompiler::Transitions DFABytecodeCompiler::transitions(const DFANode& node) { Transitions transitions; uint32_t destinations[128]; memset(destinations, 0xff, sizeof(destinations)); const uint32_t noDestination = std::numeric_limits::max(); transitions.useFallbackTransition = node.canUseFallbackTransition(m_dfa); if (transitions.useFallbackTransition) transitions.fallbackTransitionTarget = node.bestFallbackTarget(m_dfa); for (const auto& transition : node.transitions(m_dfa)) { uint32_t targetNodeIndex = transition.target(); if (transitions.useFallbackTransition && transitions.fallbackTransitionTarget == targetNodeIndex) continue; for (uint16_t i = transition.range().first; i <= transition.range().last; ++i) destinations[i] = targetNodeIndex; } Vector& ranges = transitions.ranges; uint8_t rangeMin; bool hasRangeMin = false; for (uint8_t i = 0; i < 128; i++) { if (hasRangeMin) { if (destinations[i] != destinations[rangeMin]) { // This is the end of a range. Check if it can be case insensitive. uint8_t rangeMax = i - 1; bool caseSensitive = true; if (rangeMin >= 'A' && rangeMax <= 'Z') { caseSensitive = false; for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) { if (destinations[rangeMin] != destinations[toASCIILower(rangeIndex)]) { caseSensitive = true; break; } } } if (!caseSensitive) { // If all the lower-case destinations are the same as the upper-case destinations, // then they will be covered by a case-insensitive range and will not need their own range. for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) { ASSERT(destinations[rangeMin] == destinations[toASCIILower(rangeIndex)]); destinations[toASCIILower(rangeIndex)] = noDestination; } ranges.append(Range(toASCIILower(rangeMin), toASCIILower(rangeMax), destinations[rangeMin], caseSensitive)); } else ranges.append(Range(rangeMin, rangeMax, destinations[rangeMin], caseSensitive)); if (destinations[i] == noDestination) hasRangeMin = false; else rangeMin = i; } } else { if (destinations[i] != noDestination) { rangeMin = i; hasRangeMin = true; } } } if (hasRangeMin) { // Ranges are appended after passing the end of them. // If a range goes to 127, we will have an uncommitted rangeMin because the loop does not check 128. // If a range goes to 127, there will never be values higher than it, so checking for case-insensitive ranges would always fail. ranges.append(Range(rangeMin, 127, destinations[rangeMin], true)); } Vector& jumpTables = transitions.jumpTables; unsigned rangePosition = 0; unsigned baseRangePosition = std::numeric_limits::max(); Range* baseRange = nullptr; while (rangePosition < ranges.size()) { auto& range = ranges[rangePosition]; if (baseRange) { if (range.min != range.max || baseRange->caseSensitive != range.caseSensitive || ranges[rangePosition - 1].max + 1 != range.min) { if (rangePosition - baseRangePosition > 1) { jumpTables.append(extractJumpTable(ranges, baseRangePosition, rangePosition - 1)); rangePosition = baseRangePosition; } baseRangePosition = std::numeric_limits::max(); baseRange = nullptr; } } else { if (range.min == range.max) { baseRangePosition = rangePosition; baseRange = ⦥ } } ++rangePosition; } if (baseRange && ranges.size() - baseRangePosition > 1) jumpTables.append(extractJumpTable(ranges, baseRangePosition, ranges.size() - 1)); return transitions; } unsigned DFABytecodeCompiler::checkForJumpTableMaxBytecodeSize(const JumpTable& jumpTable) { unsigned baselineSize = sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t); unsigned targetsSize = (jumpTable.max - jumpTable.min + 1) * sizeof(uint32_t); return baselineSize + targetsSize; } unsigned DFABytecodeCompiler::checkForRangeMaxBytecodeSize(const Range& range) { if (range.min == range.max) return sizeof(DFABytecodeInstruction::CheckValueCaseInsensitive) + sizeof(uint8_t) + sizeof(uint32_t); return sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t) + sizeof(uint32_t); } void DFABytecodeCompiler::compileJumpTable(uint32_t nodeIndex, const JumpTable& jumpTable) { unsigned startSize = m_bytecode.size(); ASSERT_WITH_MESSAGE(jumpTable.max < 128, "The DFA engine only supports the ASCII alphabet."); ASSERT(jumpTable.min <= jumpTable.max); uint32_t instructionLocation = m_bytecode.size(); DFABytecodeJumpSize jumpSize = Int8; for (uint32_t destinationNodeIndex : jumpTable.destinations) { int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex); DFABytecodeJumpSize localJumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance); jumpSize = std::max(jumpSize, localJumpSize); } DFABytecodeInstruction instruction = jumpTable.caseSensitive ? DFABytecodeInstruction::JumpTableCaseSensitive : DFABytecodeInstruction::JumpTableCaseInsensitive; append(m_bytecode, static_cast(instruction) | jumpSize); append(m_bytecode, jumpTable.min); append(m_bytecode, jumpTable.max); for (uint32_t destinationNodeIndex : jumpTable.destinations) { int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex); uint32_t jumpLocation = m_bytecode.size(); m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex})); appendZeroes(m_bytecode, jumpSize); } ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForJumpTableMaxBytecodeSize(jumpTable)); } void DFABytecodeCompiler::compileCheckForRange(uint32_t nodeIndex, const Range& range) { unsigned startSize = m_bytecode.size(); ASSERT_WITH_MESSAGE(range.max < 128, "The DFA engine only supports the ASCII alphabet."); ASSERT(range.min <= range.max); if (range.min == range.max) emitCheckValue(range.min, nodeIndex, range.destination, range.caseSensitive); else emitCheckValueRange(range.min, range.max, nodeIndex, range.destination, range.caseSensitive); ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForRangeMaxBytecodeSize(range)); } unsigned DFABytecodeCompiler::nodeTransitionsMaxBytecodeSize(const DFANode& node) { unsigned size = 0; Transitions nodeTransitions = transitions(node); for (const auto& jumpTable : nodeTransitions.jumpTables) size += checkForJumpTableMaxBytecodeSize(jumpTable); for (const auto& range : nodeTransitions.ranges) size += checkForRangeMaxBytecodeSize(range); if (nodeTransitions.useFallbackTransition) size += sizeof(DFABytecodeInstruction::Jump) + sizeof(uint32_t); else size += instructionSizeWithArguments(DFABytecodeInstruction::Terminate); return size; } void DFABytecodeCompiler::compileNodeTransitions(uint32_t nodeIndex) { const DFANode& node = m_dfa.nodes[nodeIndex]; unsigned startSize = m_bytecode.size(); Transitions nodeTransitions = transitions(node); for (const auto& jumpTable : nodeTransitions.jumpTables) compileJumpTable(nodeIndex, jumpTable); for (const auto& range : nodeTransitions.ranges) compileCheckForRange(nodeIndex, range); if (nodeTransitions.useFallbackTransition) emitJump(nodeIndex, nodeTransitions.fallbackTransitionTarget); else emitTerminate(); ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= nodeTransitionsMaxBytecodeSize(node)); } void DFABytecodeCompiler::compile() { uint32_t startLocation = m_bytecode.size(); append(m_bytecode, 0); // This will be set when we are finished compiling this DFA. m_nodeStartOffsets.resize(m_dfa.nodes.size()); for (unsigned i = 0; i < m_dfa.nodes.size(); ++i) m_nodeStartOffsets[i] = std::numeric_limits::max(); // Populate m_maxNodeStartOffsets with a worst-case index of where the node would be with no branch compaction. // Compacting the branches using 1-4 byte signed jump distances should only make nodes closer together than this. ASSERT(m_maxNodeStartOffsets.isEmpty()); m_maxNodeStartOffsets.clear(); m_maxNodeStartOffsets.resize(m_dfa.nodes.size()); unsigned rootActionsSize = 0; for (uint64_t action : m_dfa.nodes[m_dfa.root].actions(m_dfa)) rootActionsSize += appendActionBytecodeSize(action); m_maxNodeStartOffsets[m_dfa.root] = sizeof(DFAHeader) + rootActionsSize; unsigned nextIndex = sizeof(DFAHeader) + compiledNodeMaxBytecodeSize(m_dfa.root); for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) { if (i != m_dfa.root) { m_maxNodeStartOffsets[i] = nextIndex; nextIndex += compiledNodeMaxBytecodeSize(i); } } // Make sure the root is always at the beginning of the bytecode. compileNode(m_dfa.root, true); for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) { if (i != m_dfa.root) compileNode(i, false); } ASSERT(m_maxNodeStartOffsets.size() == m_nodeStartOffsets.size()); for (unsigned i = 0; i < m_dfa.nodes.size(); ++i) { if (m_nodeStartOffsets[i] != std::numeric_limits::max()) ASSERT(m_maxNodeStartOffsets[i] >= m_nodeStartOffsets[i]); } // Link. for (const auto& linkRecord : m_linkRecords) { uint32_t destination = m_nodeStartOffsets[linkRecord.destinationNodeIndex]; RELEASE_ASSERT(destination < std::numeric_limits::max()); int32_t distance = destination - linkRecord.instructionLocation; ASSERT(abs(distance) <= abs(linkRecord.longestPossibleJump)); switch (linkRecord.jumpSize) { case Int8: RELEASE_ASSERT(distance == static_cast(distance)); setBits(m_bytecode, linkRecord.jumpLocation, static_cast(distance)); break; case Int16: RELEASE_ASSERT(distance == static_cast(distance)); setBits(m_bytecode, linkRecord.jumpLocation, static_cast(distance)); break; case Int24: RELEASE_ASSERT(distance >= Int24Min && distance <= Int24Max); setBits(m_bytecode, linkRecord.jumpLocation, static_cast(distance)); setBits(m_bytecode, linkRecord.jumpLocation + sizeof(int16_t), static_cast(distance >> 16)); break; case Int32: setBits(m_bytecode, linkRecord.jumpLocation, distance); break; } } setBits(m_bytecode, startLocation, m_bytecode.size() - startLocation); } } // namespace ContentExtensions } // namespace WebCore #endif // ENABLE(CONTENT_EXTENSIONS)