/* * Copyright (C) 2015-2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "AirIteratedRegisterCoalescing.h" #if ENABLE(B3_JIT) #include "AirCode.h" #include "AirInsertionSet.h" #include "AirInstInlines.h" #include "AirLiveness.h" #include "AirPhaseScope.h" #include "AirRegisterPriority.h" #include "AirTmpInlines.h" #include "AirTmpWidth.h" #include "AirUseCounts.h" #include #include namespace JSC { namespace B3 { namespace Air { namespace { bool debug = false; bool traceDebug = false; bool reportStats = false; // The AbstractColoringAllocator defines all the code that is independant // from the type or register and can be shared when allocating registers. template class AbstractColoringAllocator { public: AbstractColoringAllocator(const Vector& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet& unspillableTmp) : m_regsInPriorityOrder(regsInPriorityOrder) , m_lastPrecoloredRegisterIndex(lastPrecoloredRegisterIndex) , m_unspillableTmps(unspillableTmp) { initializeDegrees(tmpArraySize); m_adjacencyList.resize(tmpArraySize); m_moveList.resize(tmpArraySize); m_coalescedTmps.fill(0, tmpArraySize); m_isOnSelectStack.ensureSize(tmpArraySize); } protected: IndexType getAlias(IndexType tmpIndex) const { IndexType alias = tmpIndex; while (IndexType nextAlias = m_coalescedTmps[alias]) alias = nextAlias; return alias; } void addEdge(IndexType a, IndexType b) { if (a == b) return; addEdgeDistinct(a, b); } void makeWorkList() { IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) { unsigned degree = m_degrees[i]; if (!degree) continue; if (degree >= m_regsInPriorityOrder.size()) addToSpill(i); else if (!m_moveList[i].isEmpty()) m_freezeWorklist.add(i); else m_simplifyWorklist.append(i); } } void addToSpill(unsigned toSpill) { if (m_unspillableTmps.contains(toSpill)) return; m_spillWorklist.add(toSpill); } // Low-degree vertex can always be colored: just pick any of the color taken by any // other adjacent verices. // The "Simplify" phase takes a low-degree out of the interference graph to simplify it. void simplify() { IndexType lastIndex = m_simplifyWorklist.takeLast(); ASSERT(!m_selectStack.contains(lastIndex)); ASSERT(!m_isOnSelectStack.get(lastIndex)); m_selectStack.append(lastIndex); m_isOnSelectStack.quickSet(lastIndex); forEachAdjacent(lastIndex, [this](IndexType adjacentTmpIndex) { decrementDegree(adjacentTmpIndex); }); } void freeze() { IndexType victimIndex = m_freezeWorklist.takeAny(); ASSERT_WITH_MESSAGE(getAlias(victimIndex) == victimIndex, "coalesce() should not leave aliased Tmp in the worklist."); m_simplifyWorklist.append(victimIndex); freezeMoves(victimIndex); } void freezeMoves(IndexType tmpIndex) { forEachNodeMoves(tmpIndex, [this, tmpIndex] (IndexType moveIndex) { if (!m_activeMoves.quickClear(moveIndex)) m_worklistMoves.takeMove(moveIndex); const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex]; IndexType srcTmpIndex = moveOperands.srcIndex; IndexType dstTmpIndex = moveOperands.dstIndex; IndexType originalOtherTmp = srcTmpIndex != tmpIndex ? srcTmpIndex : dstTmpIndex; IndexType otherTmpIndex = getAlias(originalOtherTmp); if (m_degrees[otherTmpIndex] < m_regsInPriorityOrder.size() && !isMoveRelated(otherTmpIndex)) { if (m_freezeWorklist.remove(otherTmpIndex)) m_simplifyWorklist.append(otherTmpIndex); } }); } void coalesce() { unsigned moveIndex = m_worklistMoves.takeLastMove(); const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex]; IndexType u = getAlias(moveOperands.srcIndex); IndexType v = getAlias(moveOperands.dstIndex); if (isPrecolored(v)) std::swap(u, v); if (traceDebug) dataLog("Coalescing move at index", moveIndex, " u = ", u, " v = ", v, "\n"); if (u == v) { addWorkList(u); if (traceDebug) dataLog(" Coalesced\n"); } else if (isPrecolored(v) || m_interferenceEdges.contains(InterferenceEdge(u, v)) || (u == m_framePointerIndex && m_interferesWithFramePointer.quickGet(v))) { addWorkList(u); addWorkList(v); if (traceDebug) dataLog(" Constrained\n"); } else if (canBeSafelyCoalesced(u, v)) { combine(u, v); addWorkList(u); m_hasCoalescedNonTrivialMove = true; if (traceDebug) dataLog(" Safe Coalescing\n"); } else { m_activeMoves.quickSet(moveIndex); if (traceDebug) dataLog(" Failed coalescing, added to active moves.\n"); } } void assignColors() { ASSERT(m_simplifyWorklist.isEmpty()); ASSERT(m_worklistMoves.isEmpty()); ASSERT(m_freezeWorklist.isEmpty()); ASSERT(m_spillWorklist.isEmpty()); // Reclaim as much memory as possible. m_interferenceEdges.clear(); m_degrees.clear(); m_moveList.clear(); m_worklistMoves.clear(); m_simplifyWorklist.clear(); m_spillWorklist.clear(); m_freezeWorklist.clear(); // Try to color the Tmp on the stack. m_coloredTmp.resize(m_adjacencyList.size()); while (!m_selectStack.isEmpty()) { unsigned tmpIndex = m_selectStack.takeLast(); ASSERT(!isPrecolored(tmpIndex)); ASSERT(!m_coloredTmp[tmpIndex]); RegisterSet coloredRegisters; for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) { IndexType aliasTmpIndex = getAlias(adjacentTmpIndex); Reg reg = m_coloredTmp[aliasTmpIndex]; ASSERT(!isPrecolored(aliasTmpIndex) || (isPrecolored(aliasTmpIndex) && reg)); if (reg) coloredRegisters.set(reg); } bool colorAssigned = false; for (Reg reg : m_regsInPriorityOrder) { if (!coloredRegisters.get(reg)) { m_coloredTmp[tmpIndex] = reg; colorAssigned = true; break; } } if (!colorAssigned) m_spilledTmps.append(tmpIndex); } m_selectStack.clear(); if (m_spilledTmps.isEmpty()) m_coalescedTmpsAtSpill.clear(); else m_coloredTmp.clear(); } private: void initializeDegrees(unsigned tmpArraySize) { m_degrees.resize(tmpArraySize); // All precolored registers have an "infinite" degree. unsigned firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; for (unsigned i = 0; i < firstNonRegIndex; ++i) m_degrees[i] = std::numeric_limits::max(); memset(m_degrees.data() + firstNonRegIndex, 0, (tmpArraySize - firstNonRegIndex) * sizeof(unsigned)); } void addEdgeDistinct(IndexType a, IndexType b) { ASSERT(a != b); if (m_interferenceEdges.add(InterferenceEdge(a, b)).isNewEntry) { if (!isPrecolored(a)) { ASSERT(!m_adjacencyList[a].contains(b)); m_adjacencyList[a].append(b); m_degrees[a]++; } if (!isPrecolored(b)) { ASSERT(!m_adjacencyList[b].contains(a)); m_adjacencyList[b].append(a); m_degrees[b]++; } } } void decrementDegree(IndexType tmpIndex) { ASSERT(m_degrees[tmpIndex]); unsigned oldDegree = m_degrees[tmpIndex]--; if (oldDegree == m_regsInPriorityOrder.size()) { enableMovesOnValueAndAdjacents(tmpIndex); m_spillWorklist.remove(tmpIndex); if (isMoveRelated(tmpIndex)) m_freezeWorklist.add(tmpIndex); else m_simplifyWorklist.append(tmpIndex); } } bool addEdgeDistinctWithoutDegreeChange(IndexType a, IndexType b) { ASSERT(a != b); if (m_interferenceEdges.add(InterferenceEdge(a, b)).isNewEntry) { if (!isPrecolored(a)) { ASSERT(!m_adjacencyList[a].contains(b)); m_adjacencyList[a].append(b); } if (!isPrecolored(b)) { ASSERT(!m_adjacencyList[b].contains(a)); m_adjacencyList[b].append(a); } return true; } return false; } bool isMoveRelated(IndexType tmpIndex) { for (unsigned moveIndex : m_moveList[tmpIndex]) { if (m_activeMoves.quickGet(moveIndex) || m_worklistMoves.contains(moveIndex)) return true; } return false; } template void forEachAdjacent(IndexType tmpIndex, Function function) { for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) { if (!hasBeenSimplified(adjacentTmpIndex)) function(adjacentTmpIndex); } } bool hasBeenSimplified(IndexType tmpIndex) { return m_isOnSelectStack.quickGet(tmpIndex) || !!m_coalescedTmps[tmpIndex]; } template void forEachNodeMoves(IndexType tmpIndex, Function function) { for (unsigned moveIndex : m_moveList[tmpIndex]) { if (m_activeMoves.quickGet(moveIndex) || m_worklistMoves.contains(moveIndex)) function(moveIndex); } } void enableMovesOnValue(IndexType tmpIndex) { for (unsigned moveIndex : m_moveList[tmpIndex]) { if (m_activeMoves.quickClear(moveIndex)) m_worklistMoves.returnMove(moveIndex); } } void enableMovesOnValueAndAdjacents(IndexType tmpIndex) { enableMovesOnValue(tmpIndex); forEachAdjacent(tmpIndex, [this] (IndexType adjacentTmpIndex) { enableMovesOnValue(adjacentTmpIndex); }); } bool isPrecolored(IndexType tmpIndex) { return tmpIndex <= m_lastPrecoloredRegisterIndex; } void addWorkList(IndexType tmpIndex) { if (!isPrecolored(tmpIndex) && m_degrees[tmpIndex] < m_regsInPriorityOrder.size() && !isMoveRelated(tmpIndex)) { m_freezeWorklist.remove(tmpIndex); m_simplifyWorklist.append(tmpIndex); } } void combine(IndexType u, IndexType v) { if (!m_freezeWorklist.remove(v)) m_spillWorklist.remove(v); ASSERT(!m_coalescedTmps[v]); m_coalescedTmps[v] = u; auto& vMoves = m_moveList[v]; m_moveList[u].add(vMoves.begin(), vMoves.end()); forEachAdjacent(v, [this, u] (IndexType adjacentTmpIndex) { if (addEdgeDistinctWithoutDegreeChange(adjacentTmpIndex, u)) { // If we added a new edge between the adjacentTmp and u, it replaces the edge // that existed with v. // The degree of adjacentTmp remains the same since the edge just changed from u to v. // All we need to do is update the degree of u. if (!isPrecolored(u)) m_degrees[u]++; } else { // If we already had an edge between the adjacentTmp and u, the degree of u // is already correct. The degree of the adjacentTmp decreases since the edge // with v is no longer relevant (we can think of it as merged with the edge with u). decrementDegree(adjacentTmpIndex); } }); if (m_framePointerIndex && m_interferesWithFramePointer.quickGet(v)) m_interferesWithFramePointer.quickSet(u); if (m_degrees[u] >= m_regsInPriorityOrder.size() && m_freezeWorklist.remove(u)) addToSpill(u); } bool canBeSafelyCoalesced(IndexType u, IndexType v) { ASSERT(!isPrecolored(v)); if (isPrecolored(u)) return precoloredCoalescingHeuristic(u, v); return conservativeHeuristic(u, v); } bool conservativeHeuristic(IndexType u, IndexType v) { // This is using the Briggs' conservative coalescing rule: // If the number of combined adjacent node with a degree >= K is less than K, // it is safe to combine the two nodes. The reason is that we know that if the graph // is colorable, we have fewer than K adjacents with high order and there is a color // for the current node. ASSERT(u != v); ASSERT(!isPrecolored(u)); ASSERT(!isPrecolored(v)); const auto& adjacentsOfU = m_adjacencyList[u]; const auto& adjacentsOfV = m_adjacencyList[v]; if (adjacentsOfU.size() + adjacentsOfV.size() < m_regsInPriorityOrder.size()) { // Shortcut: if the total number of adjacents is less than the number of register, the condition is always met. return true; } HashSet highOrderAdjacents; for (IndexType adjacentTmpIndex : adjacentsOfU) { ASSERT(adjacentTmpIndex != v); ASSERT(adjacentTmpIndex != u); if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= m_regsInPriorityOrder.size()) { auto addResult = highOrderAdjacents.add(adjacentTmpIndex); if (addResult.isNewEntry && highOrderAdjacents.size() >= m_regsInPriorityOrder.size()) return false; } } for (IndexType adjacentTmpIndex : adjacentsOfV) { ASSERT(adjacentTmpIndex != u); ASSERT(adjacentTmpIndex != v); if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= m_regsInPriorityOrder.size()) { auto addResult = highOrderAdjacents.add(adjacentTmpIndex); if (addResult.isNewEntry && highOrderAdjacents.size() >= m_regsInPriorityOrder.size()) return false; } } ASSERT(highOrderAdjacents.size() < m_regsInPriorityOrder.size()); return true; } bool precoloredCoalescingHeuristic(IndexType u, IndexType v) { ASSERT(isPrecolored(u)); ASSERT(!isPrecolored(v)); // If any adjacent of the non-colored node is not an adjacent of the colored node AND has a degree >= K // there is a risk that this node needs to have the same color as our precolored node. If we coalesce such // move, we may create an uncolorable graph. const auto& adjacentsOfV = m_adjacencyList[v]; for (unsigned adjacentTmpIndex : adjacentsOfV) { if (!isPrecolored(adjacentTmpIndex) && !hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= m_regsInPriorityOrder.size() && !m_interferenceEdges.contains(InterferenceEdge(u, adjacentTmpIndex))) return false; } return true; } protected: #if PLATFORM(COCOA) #pragma mark - #endif // Interference edges are not directed. An edge between any two Tmps is represented // by the concatenated values of the smallest Tmp followed by the bigger Tmp. class InterferenceEdge { public: InterferenceEdge() { } InterferenceEdge(IndexType a, IndexType b) { ASSERT(a); ASSERT(b); ASSERT_WITH_MESSAGE(a != b, "A Tmp can never interfere with itself. Doing so would force it to be the superposition of two registers."); if (b < a) std::swap(a, b); m_value = static_cast(a) << 32 | b; } InterferenceEdge(WTF::HashTableDeletedValueType) : m_value(std::numeric_limits::max()) { } IndexType first() const { return m_value >> 32 & 0xffffffff; } IndexType second() const { return m_value & 0xffffffff; } bool operator==(const InterferenceEdge other) const { return m_value == other.m_value; } bool isHashTableDeletedValue() const { return *this == InterferenceEdge(WTF::HashTableDeletedValue); } unsigned hash() const { return WTF::IntHash::hash(m_value); } void dump(PrintStream& out) const { out.print(first(), "<=>", second()); } private: uint64_t m_value { 0 }; }; struct InterferenceEdgeHash { static unsigned hash(const InterferenceEdge& key) { return key.hash(); } static bool equal(const InterferenceEdge& a, const InterferenceEdge& b) { return a == b; } static const bool safeToCompareToEmptyOrDeleted = true; }; typedef SimpleClassHashTraits InterferenceEdgeHashTraits; const Vector& m_regsInPriorityOrder; IndexType m_lastPrecoloredRegisterIndex { 0 }; // The interference graph. HashSet m_interferenceEdges; Vector, 0, UnsafeVectorOverflow> m_adjacencyList; Vector m_degrees; // Instead of keeping track of the move instructions, we just keep their operands around and use the index // in the vector as the "identifier" for the move. struct MoveOperands { IndexType srcIndex; IndexType dstIndex; }; Vector m_coalescingCandidates; // List of every move instruction associated with a Tmp. Vector::Hash, WTF::UnsignedWithZeroKeyHashTraits>> m_moveList; // Colors. Vector m_coloredTmp; Vector m_spilledTmps; // Values that have been coalesced with an other value. Vector m_coalescedTmps; // The stack of Tmp removed from the graph and ready for coloring. BitVector m_isOnSelectStack; Vector m_selectStack; IndexType m_framePointerIndex { 0 }; BitVector m_interferesWithFramePointer; struct OrderedMoveSet { unsigned addMove() { ASSERT(m_lowPriorityMoveList.isEmpty()); ASSERT(!m_firstLowPriorityMoveIndex); unsigned nextIndex = m_positionInMoveList.size(); unsigned position = m_moveList.size(); m_moveList.append(nextIndex); m_positionInMoveList.append(position); return nextIndex; } void startAddingLowPriorityMoves() { ASSERT(m_lowPriorityMoveList.isEmpty()); m_firstLowPriorityMoveIndex = m_moveList.size(); } unsigned addLowPriorityMove() { ASSERT(m_firstLowPriorityMoveIndex == m_moveList.size()); unsigned nextIndex = m_positionInMoveList.size(); unsigned position = m_lowPriorityMoveList.size(); m_lowPriorityMoveList.append(nextIndex); m_positionInMoveList.append(position); ASSERT(nextIndex >= m_firstLowPriorityMoveIndex); return nextIndex; } bool isEmpty() const { return m_moveList.isEmpty() && m_lowPriorityMoveList.isEmpty(); } bool contains(unsigned index) { return m_positionInMoveList[index] != std::numeric_limits::max(); } void takeMove(unsigned moveIndex) { unsigned positionInMoveList = m_positionInMoveList[moveIndex]; if (positionInMoveList == std::numeric_limits::max()) return; if (moveIndex < m_firstLowPriorityMoveIndex) { ASSERT(m_moveList[positionInMoveList] == moveIndex); unsigned lastIndex = m_moveList.last(); m_positionInMoveList[lastIndex] = positionInMoveList; m_moveList[positionInMoveList] = lastIndex; m_moveList.removeLast(); } else { ASSERT(m_lowPriorityMoveList[positionInMoveList] == moveIndex); unsigned lastIndex = m_lowPriorityMoveList.last(); m_positionInMoveList[lastIndex] = positionInMoveList; m_lowPriorityMoveList[positionInMoveList] = lastIndex; m_lowPriorityMoveList.removeLast(); } m_positionInMoveList[moveIndex] = std::numeric_limits::max(); ASSERT(!contains(moveIndex)); } unsigned takeLastMove() { ASSERT(!isEmpty()); unsigned lastIndex; if (!m_moveList.isEmpty()) { lastIndex = m_moveList.takeLast(); ASSERT(m_positionInMoveList[lastIndex] == m_moveList.size()); } else { lastIndex = m_lowPriorityMoveList.takeLast(); ASSERT(m_positionInMoveList[lastIndex] == m_lowPriorityMoveList.size()); } m_positionInMoveList[lastIndex] = std::numeric_limits::max(); ASSERT(!contains(lastIndex)); return lastIndex; } void returnMove(unsigned index) { // This assertion is a bit strict but that is how the move list should be used. The only kind of moves that can // return to the list are the ones that we previously failed to coalesce with the conservative heuristics. // Values should not be added back if they were never taken out when attempting coalescing. ASSERT(!contains(index)); if (index < m_firstLowPriorityMoveIndex) { unsigned position = m_moveList.size(); m_moveList.append(index); m_positionInMoveList[index] = position; } else { unsigned position = m_lowPriorityMoveList.size(); m_lowPriorityMoveList.append(index); m_positionInMoveList[index] = position; } ASSERT(contains(index)); } void clear() { m_positionInMoveList.clear(); m_moveList.clear(); m_lowPriorityMoveList.clear(); } private: Vector m_positionInMoveList; Vector m_moveList; Vector m_lowPriorityMoveList; unsigned m_firstLowPriorityMoveIndex { 0 }; }; // Work lists. // Set of "move" enabled for possible coalescing. OrderedMoveSet m_worklistMoves; // Set of "move" not yet ready for coalescing. BitVector m_activeMoves; // Low-degree, non-Move related. Vector m_simplifyWorklist; // High-degree Tmp. HashSet m_spillWorklist; // Low-degree, Move related. HashSet m_freezeWorklist; bool m_hasSelectedSpill { false }; bool m_hasCoalescedNonTrivialMove { false }; // The mapping of Tmp to their alias for Moves that are always coalescing regardless of spilling. Vector m_coalescedTmpsAtSpill; const HashSet& m_unspillableTmps; }; // This perform all the tasks that are specific to certain register type. template class ColoringAllocator : public AbstractColoringAllocator { public: ColoringAllocator(Code& code, TmpWidth& tmpWidth, const UseCounts& useCounts, const HashSet& unspillableTmp) : AbstractColoringAllocator(regsInPriorityOrder(type), AbsoluteTmpMapper::lastMachineRegisterIndex(), tmpArraySize(code), unspillableTmp) , m_code(code) , m_tmpWidth(tmpWidth) , m_useCounts(useCounts) { if (type == Arg::GP) { m_framePointerIndex = AbsoluteTmpMapper::absoluteIndex(Tmp(MacroAssembler::framePointerRegister)); m_interferesWithFramePointer.ensureSize(tmpArraySize(code)); } initializePrecoloredTmp(); build(); allocate(); } Tmp getAlias(Tmp tmp) const { return AbsoluteTmpMapper::tmpFromAbsoluteIndex(getAlias(AbsoluteTmpMapper::absoluteIndex(tmp))); } // This tells you if a Move will be coalescable if the src and dst end up matching. This method // relies on an analysis that is invalidated by register allocation, so you it's only meaningful to // call this *before* replacing the Tmp's in this Inst with registers or spill slots. bool mayBeCoalescable(const Inst& inst) const { return mayBeCoalescableImpl(inst, &m_tmpWidth); } bool isUselessMove(const Inst& inst) const { return mayBeCoalescableImpl(inst, nullptr) && inst.args[0].tmp() == inst.args[1].tmp(); } Tmp getAliasWhenSpilling(Tmp tmp) const { ASSERT_WITH_MESSAGE(!m_spilledTmps.isEmpty(), "This function is only valid for coalescing during spilling."); if (m_coalescedTmpsAtSpill.isEmpty()) return tmp; unsigned aliasIndex = AbsoluteTmpMapper::absoluteIndex(tmp); while (unsigned nextAliasIndex = m_coalescedTmpsAtSpill[aliasIndex]) aliasIndex = nextAliasIndex; Tmp alias = AbsoluteTmpMapper::tmpFromAbsoluteIndex(aliasIndex); ASSERT_WITH_MESSAGE(!m_spilledTmps.contains(aliasIndex) || alias == tmp, "The aliases at spill should always be colorable. Something went horribly wrong."); return alias; } template class IndexToTmpIteratorAdaptor { public: IndexToTmpIteratorAdaptor(IndexIterator&& indexIterator) : m_indexIterator(WTFMove(indexIterator)) { } Tmp operator*() const { return AbsoluteTmpMapper::tmpFromAbsoluteIndex(*m_indexIterator); } IndexToTmpIteratorAdaptor& operator++() { ++m_indexIterator; return *this; } bool operator==(const IndexToTmpIteratorAdaptor& other) const { return m_indexIterator == other.m_indexIterator; } bool operator!=(const IndexToTmpIteratorAdaptor& other) const { return !(*this == other); } private: IndexIterator m_indexIterator; }; template class IndexToTmpIterableAdaptor { public: IndexToTmpIterableAdaptor(const Collection& collection) : m_collection(collection) { } IndexToTmpIteratorAdaptor begin() const { return m_collection.begin(); } IndexToTmpIteratorAdaptor end() const { return m_collection.end(); } private: const Collection& m_collection; }; IndexToTmpIterableAdaptor> spilledTmps() const { return m_spilledTmps; } bool requiresSpilling() const { return !m_spilledTmps.isEmpty(); } Reg allocatedReg(Tmp tmp) const { ASSERT(!tmp.isReg()); ASSERT(m_coloredTmp.size()); ASSERT(tmp.isGP() == (type == Arg::GP)); Reg reg = m_coloredTmp[AbsoluteTmpMapper::absoluteIndex(tmp)]; if (!reg) { // We only care about Tmps that interfere. A Tmp that never interfere with anything // can take any register. reg = regsInPriorityOrder(type).first(); } return reg; } private: static unsigned tmpArraySize(Code& code) { unsigned numTmps = code.numTmps(type); return AbsoluteTmpMapper::absoluteIndex(numTmps); } void initializePrecoloredTmp() { m_coloredTmp.resize(m_lastPrecoloredRegisterIndex + 1); for (unsigned i = 1; i <= m_lastPrecoloredRegisterIndex; ++i) { Tmp tmp = AbsoluteTmpMapper::tmpFromAbsoluteIndex(i); ASSERT(tmp.isReg()); m_coloredTmp[i] = tmp.reg(); } } bool mayBeCoalesced(Arg left, Arg right) { if (!left.isTmp() || !right.isTmp()) return false; Tmp leftTmp = left.tmp(); Tmp rightTmp = right.tmp(); if (leftTmp == rightTmp) return false; if (leftTmp.isGP() != (type == Arg::GP) || rightTmp.isGP() != (type == Arg::GP)) return false; unsigned leftIndex = AbsoluteTmpMapper::absoluteIndex(leftTmp); unsigned rightIndex = AbsoluteTmpMapper::absoluteIndex(rightTmp); return !m_interferenceEdges.contains(InterferenceEdge(leftIndex, rightIndex)); } void addToLowPriorityCoalescingCandidates(Arg left, Arg right) { ASSERT(mayBeCoalesced(left, right)); Tmp leftTmp = left.tmp(); Tmp rightTmp = right.tmp(); unsigned leftIndex = AbsoluteTmpMapper::absoluteIndex(leftTmp); unsigned rightIndex = AbsoluteTmpMapper::absoluteIndex(rightTmp); unsigned nextMoveIndex = m_coalescingCandidates.size(); m_coalescingCandidates.append({ leftIndex, rightIndex }); unsigned newIndexInWorklist = m_worklistMoves.addLowPriorityMove(); ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex); ASSERT(nextMoveIndex <= m_activeMoves.size()); m_activeMoves.ensureSize(nextMoveIndex + 1); m_moveList[leftIndex].add(nextMoveIndex); m_moveList[rightIndex].add(nextMoveIndex); } void build() { TmpLiveness liveness(m_code); for (BasicBlock* block : m_code) { typename TmpLiveness::LocalCalc localCalc(liveness, block); for (unsigned instIndex = block->size(); instIndex--;) { Inst& inst = block->at(instIndex); Inst* nextInst = block->get(instIndex + 1); build(&inst, nextInst, localCalc); localCalc.execute(instIndex); } build(nullptr, &block->at(0), localCalc); } buildLowPriorityMoveList(); } void build(Inst* prevInst, Inst* nextInst, const typename TmpLiveness::LocalCalc& localCalc) { Inst::forEachDefWithExtraClobberedRegs( prevInst, nextInst, [&] (const Tmp& arg, Arg::Role, Arg::Type argType, Arg::Width) { if (argType != type) return; // All the Def()s interfere with each other and with all the extra clobbered Tmps. // We should not use forEachDefWithExtraClobberedRegs() here since colored Tmps // do not need interference edges in our implementation. Inst::forEachDef( prevInst, nextInst, [&] (Tmp& otherArg, Arg::Role, Arg::Type argType, Arg::Width) { if (argType != type) return; this->addEdge(arg, otherArg); }); }); if (prevInst && mayBeCoalescable(*prevInst)) { // We do not want the Use() of this move to interfere with the Def(), even if it is live // after the Move. If we were to add the interference edge, it would be impossible to // coalesce the Move even if the two Tmp never interfere anywhere. Tmp defTmp; Tmp useTmp; prevInst->forEachTmp([&defTmp, &useTmp] (Tmp& argTmp, Arg::Role role, Arg::Type, Arg::Width) { if (Arg::isLateDef(role)) defTmp = argTmp; else { ASSERT(Arg::isEarlyUse(role)); useTmp = argTmp; } }); ASSERT(defTmp); ASSERT(useTmp); unsigned nextMoveIndex = m_coalescingCandidates.size(); m_coalescingCandidates.append({ AbsoluteTmpMapper::absoluteIndex(useTmp), AbsoluteTmpMapper::absoluteIndex(defTmp) }); unsigned newIndexInWorklist = m_worklistMoves.addMove(); ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex); ASSERT(nextMoveIndex <= m_activeMoves.size()); m_activeMoves.ensureSize(nextMoveIndex + 1); for (const Arg& arg : prevInst->args) { auto& list = m_moveList[AbsoluteTmpMapper::absoluteIndex(arg.tmp())]; list.add(nextMoveIndex); } for (const Tmp& liveTmp : localCalc.live()) { if (liveTmp != useTmp) addEdge(defTmp, liveTmp); } // The next instruction could have early clobbers or early def's. We need to consider // those now. addEdges(nullptr, nextInst, localCalc.live()); } else addEdges(prevInst, nextInst, localCalc.live()); } void buildLowPriorityMoveList() { if (!isX86()) return; m_worklistMoves.startAddingLowPriorityMoves(); for (BasicBlock* block : m_code) { for (Inst& inst : *block) { if (Optional defArgIndex = inst.shouldTryAliasingDef()) { Arg op1 = inst.args[*defArgIndex - 2]; Arg op2 = inst.args[*defArgIndex - 1]; Arg dest = inst.args[*defArgIndex]; if (op1 == dest || op2 == dest) continue; if (mayBeCoalesced(op1, dest)) addToLowPriorityCoalescingCandidates(op1, dest); if (op1 != op2 && mayBeCoalesced(op2, dest)) addToLowPriorityCoalescingCandidates(op2, dest); } } } } void addEdges(Inst* prevInst, Inst* nextInst, typename TmpLiveness::LocalCalc::Iterable liveTmps) { // All the Def()s interfere with everthing live. Inst::forEachDefWithExtraClobberedRegs( prevInst, nextInst, [&] (const Tmp& arg, Arg::Role, Arg::Type argType, Arg::Width) { if (argType != type) return; for (const Tmp& liveTmp : liveTmps) { ASSERT(liveTmp.isGP() == (type == Arg::GP)); addEdge(arg, liveTmp); } if (type == Arg::GP && !arg.isGPR()) m_interferesWithFramePointer.quickSet(AbsoluteTmpMapper::absoluteIndex(arg)); }); } void addEdge(Tmp a, Tmp b) { ASSERT_WITH_MESSAGE(a.isGP() == b.isGP(), "An interference between registers of different types does not make sense, it can lead to non-colorable graphs."); addEdge(AbsoluteTmpMapper::absoluteIndex(a), AbsoluteTmpMapper::absoluteIndex(b)); } // Calling this without a tmpWidth will perform a more conservative coalescing analysis that assumes // that Move32's are not coalescable. static bool mayBeCoalescableImpl(const Inst& inst, TmpWidth* tmpWidth) { switch (type) { case Arg::GP: switch (inst.opcode) { case Move: case Move32: break; default: return false; } break; case Arg::FP: switch (inst.opcode) { case MoveFloat: case MoveDouble: break; default: return false; } break; } ASSERT_WITH_MESSAGE(inst.args.size() == 2, "We assume coalecable moves only have two arguments in a few places."); if (!inst.args[0].isTmp() || !inst.args[1].isTmp()) return false; ASSERT(inst.args[0].type() == type); ASSERT(inst.args[1].type() == type); // We can coalesce a Move32 so long as either of the following holds: // - The input is already zero-filled. // - The output only cares about the low 32 bits. // // Note that the input property requires an analysis over ZDef's, so it's only valid so long // as the input gets a register. We don't know if the input gets a register, but we do know // that if it doesn't get a register then we will still emit this Move32. if (inst.opcode == Move32) { if (!tmpWidth) return false; if (tmpWidth->defWidth(inst.args[0].tmp()) > Arg::Width32 && tmpWidth->useWidth(inst.args[1].tmp()) > Arg::Width32) return false; } return true; } void selectSpill() { if (!m_hasSelectedSpill) { m_hasSelectedSpill = true; if (m_hasCoalescedNonTrivialMove) m_coalescedTmpsAtSpill = m_coalescedTmps; } auto iterator = m_spillWorklist.begin(); RELEASE_ASSERT_WITH_MESSAGE(iterator != m_spillWorklist.end(), "selectSpill() called when there was no spill."); RELEASE_ASSERT_WITH_MESSAGE(!m_unspillableTmps.contains(*iterator), "trying to spill unspillable tmp"); // Higher score means more desirable to spill. Lower scores maximize the likelihood that a tmp // gets a register. auto score = [&] (Tmp tmp) -> double { // Air exposes the concept of "fast tmps", and we interpret that to mean that the tmp // should always be in a register. if (m_code.isFastTmp(tmp)) return 0; // All else being equal, the score should be directly related to the degree. double degree = static_cast(m_degrees[AbsoluteTmpMapper::absoluteIndex(tmp)]); // All else being equal, the score should be inversely related to the number of warm uses and // defs. const UseCounts::Counts* counts = m_useCounts[tmp]; if (!counts) return std::numeric_limits::infinity(); double uses = counts->numWarmUses + counts->numDefs; // If it's a constant, then it's not as bad to spill. We can rematerialize it in many // cases. if (counts->numConstDefs == counts->numDefs) uses /= 2; return degree / uses; }; auto victimIterator = iterator; double maxScore = score(AbsoluteTmpMapper::tmpFromAbsoluteIndex(*iterator)); ++iterator; for (;iterator != m_spillWorklist.end(); ++iterator) { double tmpScore = score(AbsoluteTmpMapper::tmpFromAbsoluteIndex(*iterator)); if (tmpScore > maxScore) { ASSERT(!m_unspillableTmps.contains(*iterator)); victimIterator = iterator; maxScore = tmpScore; } } unsigned victimIndex = *victimIterator; m_spillWorklist.remove(victimIterator); m_simplifyWorklist.append(victimIndex); freezeMoves(victimIndex); } void allocate() { ASSERT_WITH_MESSAGE(m_activeMoves.size() >= m_coalescingCandidates.size(), "The activeMove set should be big enough for the quick operations of BitVector."); makeWorkList(); if (debug) { dataLog("Interference: ", listDump(m_interferenceEdges), "\n"); dumpInterferenceGraphInDot(WTF::dataFile()); dataLog("Coalescing candidates:\n"); for (MoveOperands& moveOp : m_coalescingCandidates) { dataLog(" ", AbsoluteTmpMapper::tmpFromAbsoluteIndex(moveOp.srcIndex), " -> ", AbsoluteTmpMapper::tmpFromAbsoluteIndex(moveOp.dstIndex), "\n"); } dataLog("Initial work list\n"); dumpWorkLists(WTF::dataFile()); } do { if (traceDebug) { dataLog("Before Graph simplification iteration\n"); dumpWorkLists(WTF::dataFile()); } if (!m_simplifyWorklist.isEmpty()) simplify(); else if (!m_worklistMoves.isEmpty()) coalesce(); else if (!m_freezeWorklist.isEmpty()) freeze(); else if (!m_spillWorklist.isEmpty()) selectSpill(); if (traceDebug) { dataLog("After Graph simplification iteration\n"); dumpWorkLists(WTF::dataFile()); } } while (!m_simplifyWorklist.isEmpty() || !m_worklistMoves.isEmpty() || !m_freezeWorklist.isEmpty() || !m_spillWorklist.isEmpty()); assignColors(); } #if PLATFORM(COCOA) #pragma mark - Debugging helpers. #endif void dumpInterferenceGraphInDot(PrintStream& out) { out.print("graph InterferenceGraph { \n"); HashSet tmpsWithInterferences; for (const auto& edge : m_interferenceEdges) { tmpsWithInterferences.add(AbsoluteTmpMapper::tmpFromAbsoluteIndex(edge.first())); tmpsWithInterferences.add(AbsoluteTmpMapper::tmpFromAbsoluteIndex(edge.second())); } for (const auto& tmp : tmpsWithInterferences) { unsigned tmpIndex = AbsoluteTmpMapper::absoluteIndex(tmp); if (tmpIndex < m_degrees.size()) out.print(" ", tmp.internalValue(), " [label=\"", tmp, " (", m_degrees[tmpIndex], ")\"];\n"); else out.print(" ", tmp.internalValue(), " [label=\"", tmp, "\"];\n"); } for (const auto& edge : m_interferenceEdges) out.print(" ", edge.first(), " -- ", edge.second(), ";\n"); out.print("}\n"); } void dumpWorkLists(PrintStream& out) { out.print("Simplify work list:\n"); for (unsigned tmpIndex : m_simplifyWorklist) out.print(" ", AbsoluteTmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n"); out.printf("Moves work list is empty? %d\n", m_worklistMoves.isEmpty()); out.print("Freeze work list:\n"); for (unsigned tmpIndex : m_freezeWorklist) out.print(" ", AbsoluteTmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n"); out.print("Spill work list:\n"); for (unsigned tmpIndex : m_spillWorklist) out.print(" ", AbsoluteTmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n"); } using AbstractColoringAllocator::addEdge; using AbstractColoringAllocator::getAlias; Code& m_code; TmpWidth& m_tmpWidth; // FIXME: spilling should not type specific. It is only a side effect of using UseCounts. const UseCounts& m_useCounts; }; class IteratedRegisterCoalescing { public: IteratedRegisterCoalescing(Code& code) : m_code(code) , m_useCounts(code) { } void run() { iteratedRegisterCoalescingOnType(); iteratedRegisterCoalescingOnType(); if (reportStats) dataLog("Num iterations = ", m_numIterations, "\n"); } private: template void iteratedRegisterCoalescingOnType() { HashSet unspillableTmps = computeUnspillableTmps(); // FIXME: If a Tmp is used only from a Scratch role and that argument is !admitsStack, then // we should add the Tmp to unspillableTmps. That will help avoid relooping only to turn the // Tmp into an unspillable Tmp. // https://bugs.webkit.org/show_bug.cgi?id=152699 while (true) { ++m_numIterations; if (traceDebug) dataLog("Code at iteration ", m_numIterations, ":\n", m_code); // FIXME: One way to optimize this code is to remove the recomputation inside the fixpoint. // We need to recompute because spilling adds tmps, but we could just update tmpWidth when we // add those tmps. Note that one easy way to remove the recomputation is to make any newly // added Tmps get the same use/def widths that the original Tmp got. But, this may hurt the // spill code we emit. Since we currently recompute TmpWidth after spilling, the newly // created Tmps may get narrower use/def widths. On the other hand, the spiller already // selects which move instruction to use based on the original Tmp's widths, so it may not // matter than a subsequent iteration sees a coservative width for the new Tmps. Also, the // recomputation may not actually be a performance problem; it's likely that a better way to // improve performance of TmpWidth is to replace its HashMap with something else. It's // possible that most of the TmpWidth overhead is from queries of TmpWidth rather than the // recomputation, in which case speeding up the lookup would be a bigger win. // https://bugs.webkit.org/show_bug.cgi?id=152478 m_tmpWidth.recompute(m_code); ColoringAllocator allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps); if (!allocator.requiresSpilling()) { assignRegistersToTmp(allocator); if (traceDebug) dataLog("Successfull allocation at iteration ", m_numIterations, ":\n", m_code); return; } addSpillAndFill(allocator, unspillableTmps); } } template HashSet computeUnspillableTmps() { HashSet unspillableTmps; struct Range { unsigned first { std::numeric_limits::max() }; unsigned last { 0 }; unsigned count { 0 }; unsigned admitStackCount { 0 }; }; unsigned numTmps = m_code.numTmps(type); unsigned arraySize = AbsoluteTmpMapper::absoluteIndex(numTmps); Vector ranges; ranges.fill(Range(), arraySize); unsigned globalIndex = 0; for (BasicBlock* block : m_code) { for (Inst& inst : *block) { inst.forEachArg([&] (Arg& arg, Arg::Role, Arg::Type argType, Arg::Width) { if (arg.isTmp() && inst.admitsStack(arg)) { if (argType != type) return; Tmp tmp = arg.tmp(); Range& range = ranges[AbsoluteTmpMapper::absoluteIndex(tmp)]; range.count++; range.admitStackCount++; if (globalIndex < range.first) { range.first = globalIndex; range.last = globalIndex; } else range.last = globalIndex; return; } arg.forEachTmpFast([&] (Tmp& tmp) { if (tmp.isGP() != (type == Arg::GP)) return; Range& range = ranges[AbsoluteTmpMapper::absoluteIndex(tmp)]; range.count++; if (globalIndex < range.first) { range.first = globalIndex; range.last = globalIndex; } else range.last = globalIndex; }); }); ++globalIndex; } ++globalIndex; } for (unsigned i = AbsoluteTmpMapper::lastMachineRegisterIndex() + 1; i < ranges.size(); ++i) { Range& range = ranges[i]; if (range.last - range.first <= 1 && range.count > range.admitStackCount) unspillableTmps.add(i); } return unspillableTmps; } template void assignRegistersToTmp(const ColoringAllocator& allocator) { for (BasicBlock* block : m_code) { // Give Tmp a valid register. for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { Inst& inst = block->at(instIndex); // The mayBeCoalescable() method will change its mind for some operations after we // complete register allocation. So, we record this before starting. bool mayBeCoalescable = allocator.mayBeCoalescable(inst); // Move32 is cheaper if we know that it's equivalent to a Move. It's // equivalent if the destination's high bits are not observable or if the source's high // bits are all zero. Note that we don't have the opposite optimization for other // architectures, which may prefer Move over Move32, because Move is canonical already. if (type == Arg::GP && inst.opcode == Move && inst.args[0].isTmp() && inst.args[1].isTmp()) { if (m_tmpWidth.useWidth(inst.args[1].tmp()) <= Arg::Width32 || m_tmpWidth.defWidth(inst.args[0].tmp()) <= Arg::Width32) inst.opcode = Move32; } inst.forEachTmpFast([&] (Tmp& tmp) { if (tmp.isReg() || tmp.isGP() == (type != Arg::GP)) return; Tmp aliasTmp = allocator.getAlias(tmp); Tmp assignedTmp; if (aliasTmp.isReg()) assignedTmp = Tmp(aliasTmp.reg()); else { auto reg = allocator.allocatedReg(aliasTmp); ASSERT(reg); assignedTmp = Tmp(reg); } ASSERT(assignedTmp.isReg()); tmp = assignedTmp; }); if (mayBeCoalescable && inst.args[0].isTmp() && inst.args[1].isTmp() && inst.args[0].tmp() == inst.args[1].tmp()) inst = Inst(); } // Remove all the useless moves we created in this block. block->insts().removeAllMatching([&] (const Inst& inst) { return !inst; }); } } template void addSpillAndFill(const ColoringAllocator& allocator, HashSet& unspillableTmps) { HashMap stackSlots; for (Tmp tmp : allocator.spilledTmps()) { // All the spilled values become unspillable. unspillableTmps.add(AbsoluteTmpMapper::absoluteIndex(tmp)); // Allocate stack slot for each spilled value. StackSlot* stackSlot = m_code.addStackSlot( m_tmpWidth.width(tmp) <= Arg::Width32 ? 4 : 8, StackSlotKind::Spill); bool isNewTmp = stackSlots.add(tmp, stackSlot).isNewEntry; ASSERT_UNUSED(isNewTmp, isNewTmp); } // Rewrite the program to get rid of the spilled Tmp. InsertionSet insertionSet(m_code); for (BasicBlock* block : m_code) { bool hasAliasedTmps = false; for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { Inst& inst = block->at(instIndex); // The TmpWidth analysis will say that a Move only stores 32 bits into the destination, // if the source only had 32 bits worth of non-zero bits. Same for the source: it will // only claim to read 32 bits from the source if only 32 bits of the destination are // read. Note that we only apply this logic if this turns into a load or store, since // Move is the canonical way to move data between GPRs. bool forceMove32IfDidSpill = false; bool didSpill = false; if (type == Arg::GP && inst.opcode == Move) { if ((inst.args[0].isTmp() && m_tmpWidth.width(inst.args[0].tmp()) <= Arg::Width32) || (inst.args[1].isTmp() && m_tmpWidth.width(inst.args[1].tmp()) <= Arg::Width32)) forceMove32IfDidSpill = true; } // Try to replace the register use by memory use when possible. inst.forEachArg( [&] (Arg& arg, Arg::Role, Arg::Type argType, Arg::Width width) { if (arg.isTmp() && argType == type && !arg.isReg()) { auto stackSlotEntry = stackSlots.find(arg.tmp()); if (stackSlotEntry != stackSlots.end() && inst.admitsStack(arg)) { stackSlotEntry->value->ensureSize( forceMove32IfDidSpill ? 4 : Arg::bytes(width)); arg = Arg::stack(stackSlotEntry->value); didSpill = true; } } }); if (didSpill && forceMove32IfDidSpill) inst.opcode = Move32; // For every other case, add Load/Store as needed. inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type argType, Arg::Width) { if (tmp.isReg() || argType != type) return; auto stackSlotEntry = stackSlots.find(tmp); if (stackSlotEntry == stackSlots.end()) { Tmp alias = allocator.getAliasWhenSpilling(tmp); if (alias != tmp) { tmp = alias; hasAliasedTmps = true; } return; } Arg arg = Arg::stack(stackSlotEntry->value); Opcode move = Oops; switch (stackSlotEntry->value->byteSize()) { case 4: move = type == Arg::GP ? Move32 : MoveFloat; break; case 8: move = type == Arg::GP ? Move : MoveDouble; break; default: RELEASE_ASSERT_NOT_REACHED(); break; } tmp = m_code.newTmp(type); unspillableTmps.add(AbsoluteTmpMapper::absoluteIndex(tmp)); if (Arg::isAnyUse(role) && role != Arg::Scratch) insertionSet.insert(instIndex, move, inst.origin, arg, tmp); if (Arg::isAnyDef(role)) insertionSet.insert(instIndex + 1, move, inst.origin, tmp, arg); }); } insertionSet.execute(block); if (hasAliasedTmps) { block->insts().removeAllMatching([&] (const Inst& inst) { return allocator.isUselessMove(inst); }); } } } Code& m_code; TmpWidth m_tmpWidth; UseCounts m_useCounts; unsigned m_numIterations { 0 }; }; } // anonymous namespace void iteratedRegisterCoalescing(Code& code) { PhaseScope phaseScope(code, "iteratedRegisterCoalescing"); IteratedRegisterCoalescing iteratedRegisterCoalescing(code); iteratedRegisterCoalescing.run(); } } } } // namespace JSC::B3::Air #endif // ENABLE(B3_JIT)