summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp')
-rw-r--r--Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp239
1 files changed, 239 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp
new file mode 100644
index 000000000..b3d5d0b71
--- /dev/null
+++ b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2015-2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "AirFixPartialRegisterStalls.h"
+
+#if ENABLE(B3_JIT)
+
+#include "AirBasicBlock.h"
+#include "AirCode.h"
+#include "AirInsertionSet.h"
+#include "AirInst.h"
+#include "AirInstInlines.h"
+#include "AirPhaseScope.h"
+#include "MacroAssembler.h"
+#include <wtf/IndexMap.h>
+#include <wtf/IndexSet.h>
+#include <wtf/Vector.h>
+
+namespace JSC { namespace B3 { namespace Air {
+
+namespace {
+
+bool hasPartialXmmRegUpdate(const Inst& inst)
+{
+ switch (inst.kind.opcode) {
+ case ConvertDoubleToFloat:
+ case ConvertFloatToDouble:
+ case ConvertInt32ToDouble:
+ case ConvertInt64ToDouble:
+ case ConvertInt32ToFloat:
+ case ConvertInt64ToFloat:
+ case SqrtDouble:
+ case SqrtFloat:
+ case CeilDouble:
+ case CeilFloat:
+ case FloorDouble:
+ case FloorFloat:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool isDependencyBreaking(const Inst& inst)
+{
+ // "xorps reg, reg" is used by the frontend to remove the dependency on its argument.
+ return inst.kind.opcode == MoveZeroToDouble;
+}
+
+// FIXME: find a good distance per architecture experimentally.
+// LLVM uses a distance of 16 but that comes from Nehalem.
+unsigned char minimumSafeDistance = 16;
+
+struct FPDefDistance {
+ FPDefDistance()
+ {
+ for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i)
+ distance[i] = 255;
+ }
+
+ void reset(FPRReg reg)
+ {
+ unsigned index = MacroAssembler::fpRegisterIndex(reg);
+ distance[index] = 255;
+ }
+
+ void add(FPRReg reg, unsigned registerDistance)
+ {
+ unsigned index = MacroAssembler::fpRegisterIndex(reg);
+ if (registerDistance < distance[index])
+ distance[index] = static_cast<unsigned char>(registerDistance);
+ }
+
+ bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0)
+ {
+ bool changed = false;
+ for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) {
+ unsigned regDistance = precessorDistance.distance[i] + constantOffset;
+ if (regDistance < minimumSafeDistance && regDistance < distance[i]) {
+ distance[i] = regDistance;
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ unsigned char distance[MacroAssembler::numberOfFPRegisters()];
+};
+
+void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd)
+{
+ --distanceToBlockEnd;
+
+ if (isDependencyBreaking(inst)) {
+ localDistance.reset(inst.args[0].tmp().fpr());
+ return;
+ }
+
+ inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) {
+ ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation.");
+
+ if (tmp.isFPR() && Arg::isAnyDef(role))
+ localDistance.add(tmp.fpr(), distanceToBlockEnd);
+ });
+}
+
+}
+
+void fixPartialRegisterStalls(Code& code)
+{
+ if (!isX86())
+ return;
+
+ PhaseScope phaseScope(code, "fixPartialRegisterStalls");
+
+ Vector<BasicBlock*> candidates;
+
+ for (BasicBlock* block : code) {
+ for (const Inst& inst : *block) {
+ if (hasPartialXmmRegUpdate(inst)) {
+ candidates.append(block);
+ break;
+ }
+ }
+ }
+
+ // Fortunately, Partial Stalls are rarely used. Return early if no block
+ // cares about them.
+ if (candidates.isEmpty())
+ return;
+
+ // For each block, this provides the distance to the last instruction setting each register
+ // on block *entry*.
+ IndexMap<BasicBlock, FPDefDistance> lastDefDistance(code.size());
+
+ // Blocks with dirty distance at head.
+ IndexSet<BasicBlock> dirty;
+
+ // First, we compute the local distance for each block and push it to the successors.
+ for (BasicBlock* block : code) {
+ FPDefDistance localDistance;
+
+ unsigned distanceToBlockEnd = block->size();
+ for (Inst& inst : *block)
+ updateDistances(inst, localDistance, distanceToBlockEnd);
+
+ for (BasicBlock* successor : block->successorBlocks()) {
+ if (lastDefDistance[successor].updateFromPrecessor(localDistance))
+ dirty.add(successor);
+ }
+ }
+
+ // Now we propagate the minimums accross blocks.
+ bool changed;
+ do {
+ changed = false;
+
+ for (BasicBlock* block : code) {
+ if (!dirty.remove(block))
+ continue;
+
+ // Little shortcut: if the block is big enough, propagating it won't add any information.
+ if (block->size() >= minimumSafeDistance)
+ continue;
+
+ unsigned blockSize = block->size();
+ FPDefDistance& blockDistance = lastDefDistance[block];
+ for (BasicBlock* successor : block->successorBlocks()) {
+ if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) {
+ dirty.add(successor);
+ changed = true;
+ }
+ }
+ }
+ } while (changed);
+
+ // Finally, update each block as needed.
+ InsertionSet insertionSet(code);
+ for (BasicBlock* block : candidates) {
+ unsigned distanceToBlockEnd = block->size();
+ FPDefDistance& localDistance = lastDefDistance[block];
+
+ for (unsigned i = 0; i < block->size(); ++i) {
+ Inst& inst = block->at(i);
+
+ if (hasPartialXmmRegUpdate(inst)) {
+ RegisterSet defs;
+ RegisterSet uses;
+ inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) {
+ if (tmp.isFPR()) {
+ if (Arg::isAnyDef(role))
+ defs.set(tmp.fpr());
+ if (Arg::isAnyUse(role))
+ uses.set(tmp.fpr());
+ }
+ });
+ // We only care about values we define but not use. Otherwise we have to wait
+ // for the value to be resolved anyway.
+ defs.exclude(uses);
+
+ defs.forEach([&] (Reg reg) {
+ if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance)
+ insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg));
+ });
+ }
+
+ updateDistances(inst, localDistance, distanceToBlockEnd);
+ }
+ insertionSet.execute(block);
+ }
+}
+
+} } } // namespace JSC::B3::Air
+
+#endif // ENABLE(B3_JIT)