diff options
Diffstat (limited to 'Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp')
| -rw-r--r-- | Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp new file mode 100644 index 000000000..b3d5d0b71 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirFixPartialRegisterStalls.h" + +#if ENABLE(B3_JIT) + +#include "AirBasicBlock.h" +#include "AirCode.h" +#include "AirInsertionSet.h" +#include "AirInst.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" +#include "MacroAssembler.h" +#include <wtf/IndexMap.h> +#include <wtf/IndexSet.h> +#include <wtf/Vector.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +bool hasPartialXmmRegUpdate(const Inst& inst) +{ + switch (inst.kind.opcode) { + case ConvertDoubleToFloat: + case ConvertFloatToDouble: + case ConvertInt32ToDouble: + case ConvertInt64ToDouble: + case ConvertInt32ToFloat: + case ConvertInt64ToFloat: + case SqrtDouble: + case SqrtFloat: + case CeilDouble: + case CeilFloat: + case FloorDouble: + case FloorFloat: + return true; + default: + break; + } + return false; +} + +bool isDependencyBreaking(const Inst& inst) +{ + // "xorps reg, reg" is used by the frontend to remove the dependency on its argument. + return inst.kind.opcode == MoveZeroToDouble; +} + +// FIXME: find a good distance per architecture experimentally. +// LLVM uses a distance of 16 but that comes from Nehalem. +unsigned char minimumSafeDistance = 16; + +struct FPDefDistance { + FPDefDistance() + { + for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) + distance[i] = 255; + } + + void reset(FPRReg reg) + { + unsigned index = MacroAssembler::fpRegisterIndex(reg); + distance[index] = 255; + } + + void add(FPRReg reg, unsigned registerDistance) + { + unsigned index = MacroAssembler::fpRegisterIndex(reg); + if (registerDistance < distance[index]) + distance[index] = static_cast<unsigned char>(registerDistance); + } + + bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0) + { + bool changed = false; + for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) { + unsigned regDistance = precessorDistance.distance[i] + constantOffset; + if (regDistance < minimumSafeDistance && regDistance < distance[i]) { + distance[i] = regDistance; + changed = true; + } + } + return changed; + } + + unsigned char distance[MacroAssembler::numberOfFPRegisters()]; +}; + +void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd) +{ + --distanceToBlockEnd; + + if (isDependencyBreaking(inst)) { + localDistance.reset(inst.args[0].tmp().fpr()); + return; + } + + inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) { + ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation."); + + if (tmp.isFPR() && Arg::isAnyDef(role)) + localDistance.add(tmp.fpr(), distanceToBlockEnd); + }); +} + +} + +void fixPartialRegisterStalls(Code& code) +{ + if (!isX86()) + return; + + PhaseScope phaseScope(code, "fixPartialRegisterStalls"); + + Vector<BasicBlock*> candidates; + + for (BasicBlock* block : code) { + for (const Inst& inst : *block) { + if (hasPartialXmmRegUpdate(inst)) { + candidates.append(block); + break; + } + } + } + + // Fortunately, Partial Stalls are rarely used. Return early if no block + // cares about them. + if (candidates.isEmpty()) + return; + + // For each block, this provides the distance to the last instruction setting each register + // on block *entry*. + IndexMap<BasicBlock, FPDefDistance> lastDefDistance(code.size()); + + // Blocks with dirty distance at head. + IndexSet<BasicBlock> dirty; + + // First, we compute the local distance for each block and push it to the successors. + for (BasicBlock* block : code) { + FPDefDistance localDistance; + + unsigned distanceToBlockEnd = block->size(); + for (Inst& inst : *block) + updateDistances(inst, localDistance, distanceToBlockEnd); + + for (BasicBlock* successor : block->successorBlocks()) { + if (lastDefDistance[successor].updateFromPrecessor(localDistance)) + dirty.add(successor); + } + } + + // Now we propagate the minimums accross blocks. + bool changed; + do { + changed = false; + + for (BasicBlock* block : code) { + if (!dirty.remove(block)) + continue; + + // Little shortcut: if the block is big enough, propagating it won't add any information. + if (block->size() >= minimumSafeDistance) + continue; + + unsigned blockSize = block->size(); + FPDefDistance& blockDistance = lastDefDistance[block]; + for (BasicBlock* successor : block->successorBlocks()) { + if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) { + dirty.add(successor); + changed = true; + } + } + } + } while (changed); + + // Finally, update each block as needed. + InsertionSet insertionSet(code); + for (BasicBlock* block : candidates) { + unsigned distanceToBlockEnd = block->size(); + FPDefDistance& localDistance = lastDefDistance[block]; + + for (unsigned i = 0; i < block->size(); ++i) { + Inst& inst = block->at(i); + + if (hasPartialXmmRegUpdate(inst)) { + RegisterSet defs; + RegisterSet uses; + inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) { + if (tmp.isFPR()) { + if (Arg::isAnyDef(role)) + defs.set(tmp.fpr()); + if (Arg::isAnyUse(role)) + uses.set(tmp.fpr()); + } + }); + // We only care about values we define but not use. Otherwise we have to wait + // for the value to be resolved anyway. + defs.exclude(uses); + + defs.forEach([&] (Reg reg) { + if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance) + insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg)); + }); + } + + updateDistances(inst, localDistance, distanceToBlockEnd); + } + insertionSet.execute(block); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) |
