/* * Copyright (C) 2015-2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "AirFixPartialRegisterStalls.h" #if ENABLE(B3_JIT) #include "AirBasicBlock.h" #include "AirCode.h" #include "AirInsertionSet.h" #include "AirInst.h" #include "AirInstInlines.h" #include "AirPhaseScope.h" #include "B3IndexMap.h" #include "B3IndexSet.h" #include "MacroAssembler.h" #include namespace JSC { namespace B3 { namespace Air { namespace { bool hasPartialXmmRegUpdate(const Inst& inst) { switch (inst.opcode) { case ConvertDoubleToFloat: case ConvertFloatToDouble: case ConvertInt32ToDouble: case ConvertInt64ToDouble: case SqrtDouble: case SqrtFloat: case CeilDouble: case CeilFloat: case FloorDouble: case FloorFloat: return true; default: break; } return false; } bool isDependencyBreaking(const Inst& inst) { // "xorps reg, reg" is used by the frontend to remove the dependency on its argument. return inst.opcode == MoveZeroToDouble; } // FIXME: find a good distance per architecture experimentally. // LLVM uses a distance of 16 but that comes from Nehalem. unsigned char minimumSafeDistance = 16; struct FPDefDistance { FPDefDistance() { for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) distance[i] = 255; } void reset(FPRReg reg) { unsigned index = MacroAssembler::fpRegisterIndex(reg); distance[index] = 255; } void add(FPRReg reg, unsigned registerDistance) { unsigned index = MacroAssembler::fpRegisterIndex(reg); if (registerDistance < distance[index]) distance[index] = static_cast(registerDistance); } bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0) { bool changed = false; for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) { unsigned regDistance = precessorDistance.distance[i] + constantOffset; if (regDistance < minimumSafeDistance && regDistance < distance[i]) { distance[i] = regDistance; changed = true; } } return changed; } unsigned char distance[MacroAssembler::numberOfFPRegisters()]; }; void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd) { --distanceToBlockEnd; if (isDependencyBreaking(inst)) { localDistance.reset(inst.args[0].tmp().fpr()); return; } inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) { ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation."); if (tmp.isFPR() && Arg::isAnyDef(role)) localDistance.add(tmp.fpr(), distanceToBlockEnd); }); } } void fixPartialRegisterStalls(Code& code) { if (!isX86()) return; PhaseScope phaseScope(code, "fixPartialRegisterStalls"); Vector candidates; for (BasicBlock* block : code) { for (const Inst& inst : *block) { if (hasPartialXmmRegUpdate(inst)) { candidates.append(block); break; } } } // Fortunately, Partial Stalls are rarely used. Return early if no block // cares about them. if (candidates.isEmpty()) return; // For each block, this provides the distance to the last instruction setting each register // on block *entry*. IndexMap lastDefDistance(code.size()); // Blocks with dirty distance at head. IndexSet dirty; // First, we compute the local distance for each block and push it to the successors. for (BasicBlock* block : code) { FPDefDistance localDistance; unsigned distanceToBlockEnd = block->size(); for (Inst& inst : *block) updateDistances(inst, localDistance, distanceToBlockEnd); for (BasicBlock* successor : block->successorBlocks()) { if (lastDefDistance[successor].updateFromPrecessor(localDistance)) dirty.add(successor); } } // Now we propagate the minimums accross blocks. bool changed; do { changed = false; for (BasicBlock* block : code) { if (!dirty.remove(block)) continue; // Little shortcut: if the block is big enough, propagating it won't add any information. if (block->size() >= minimumSafeDistance) continue; unsigned blockSize = block->size(); FPDefDistance& blockDistance = lastDefDistance[block]; for (BasicBlock* successor : block->successorBlocks()) { if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) { dirty.add(successor); changed = true; } } } } while (changed); // Finally, update each block as needed. InsertionSet insertionSet(code); for (BasicBlock* block : candidates) { unsigned distanceToBlockEnd = block->size(); FPDefDistance& localDistance = lastDefDistance[block]; for (unsigned i = 0; i < block->size(); ++i) { Inst& inst = block->at(i); if (hasPartialXmmRegUpdate(inst)) { RegisterSet defs; RegisterSet uses; inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) { if (tmp.isFPR()) { if (Arg::isAnyDef(role)) defs.set(tmp.fpr()); if (Arg::isAnyUse(role)) uses.set(tmp.fpr()); } }); // We only care about values we define but not use. Otherwise we have to wait // for the value to be resolved anyway. defs.exclude(uses); defs.forEach([&] (Reg reg) { if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance) insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg)); }); } updateDistances(inst, localDistance, distanceToBlockEnd); } insertionSet.execute(block); } } } } } // namespace JSC::B3::Air #endif // ENABLE(B3_JIT)