1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
|
/*
* Copyright (C) 2011, 2013-2015 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "DFGOSRExitCompiler.h"
#if ENABLE(DFG_JIT) && USE(JSVALUE64)
#include "DFGOperations.h"
#include "DFGOSRExitCompilerCommon.h"
#include "DFGSpeculativeJIT.h"
#include "JSCInlines.h"
#include "VirtualRegister.h"
#include <wtf/DataLog.h>
namespace JSC { namespace DFG {
void OSRExitCompiler::compileExit(const OSRExit& exit, const Operands<ValueRecovery>& operands, SpeculationRecovery* recovery)
{
m_jit.jitAssertTagsInPlace();
// Pro-forma stuff.
if (Options::printEachOSRExit()) {
SpeculationFailureDebugInfo* debugInfo = new SpeculationFailureDebugInfo;
debugInfo->codeBlock = m_jit.codeBlock();
debugInfo->kind = exit.m_kind;
debugInfo->bytecodeOffset = exit.m_codeOrigin.bytecodeIndex;
m_jit.debugCall(debugOperationPrintSpeculationFailure, debugInfo);
}
// Perform speculation recovery. This only comes into play when an operation
// starts mutating state before verifying the speculation it has already made.
if (recovery) {
switch (recovery->type()) {
case SpeculativeAdd:
m_jit.sub32(recovery->src(), recovery->dest());
m_jit.or64(GPRInfo::tagTypeNumberRegister, recovery->dest());
break;
case BooleanSpeculationCheck:
m_jit.xor64(AssemblyHelpers::TrustedImm32(static_cast<int32_t>(ValueFalse)), recovery->dest());
break;
default:
break;
}
}
// Refine some array and/or value profile, if appropriate.
if (!!exit.m_jsValueSource) {
if (exit.m_kind == BadCache || exit.m_kind == BadIndexingType) {
// If the instruction that this originated from has an array profile, then
// refine it. If it doesn't, then do nothing. The latter could happen for
// hoisted checks, or checks emitted for operations that didn't have array
// profiling - either ops that aren't array accesses at all, or weren't
// known to be array acceses in the bytecode. The latter case is a FIXME
// while the former case is an outcome of a CheckStructure not knowing why
// it was emitted (could be either due to an inline cache of a property
// property access, or due to an array profile).
CodeOrigin codeOrigin = exit.m_codeOriginForExitProfile;
if (ArrayProfile* arrayProfile = m_jit.baselineCodeBlockFor(codeOrigin)->getArrayProfile(codeOrigin.bytecodeIndex)) {
GPRReg usedRegister;
if (exit.m_jsValueSource.isAddress())
usedRegister = exit.m_jsValueSource.base();
else
usedRegister = exit.m_jsValueSource.gpr();
GPRReg scratch1;
GPRReg scratch2;
scratch1 = AssemblyHelpers::selectScratchGPR(usedRegister);
scratch2 = AssemblyHelpers::selectScratchGPR(usedRegister, scratch1);
if (isARM64()) {
m_jit.pushToSave(scratch1);
m_jit.pushToSave(scratch2);
} else {
m_jit.push(scratch1);
m_jit.push(scratch2);
}
GPRReg value;
if (exit.m_jsValueSource.isAddress()) {
value = scratch1;
m_jit.loadPtr(AssemblyHelpers::Address(exit.m_jsValueSource.asAddress()), value);
} else
value = exit.m_jsValueSource.gpr();
m_jit.load32(AssemblyHelpers::Address(value, JSCell::structureIDOffset()), scratch1);
m_jit.store32(scratch1, arrayProfile->addressOfLastSeenStructureID());
m_jit.load8(AssemblyHelpers::Address(value, JSCell::indexingTypeOffset()), scratch1);
m_jit.move(AssemblyHelpers::TrustedImm32(1), scratch2);
m_jit.lshift32(scratch1, scratch2);
m_jit.or32(scratch2, AssemblyHelpers::AbsoluteAddress(arrayProfile->addressOfArrayModes()));
if (isARM64()) {
m_jit.popToRestore(scratch2);
m_jit.popToRestore(scratch1);
} else {
m_jit.pop(scratch2);
m_jit.pop(scratch1);
}
}
}
if (!!exit.m_valueProfile) {
EncodedJSValue* bucket = exit.m_valueProfile.getSpecFailBucket(0);
if (exit.m_jsValueSource.isAddress()) {
// We can't be sure that we have a spare register. So use the tagTypeNumberRegister,
// since we know how to restore it.
m_jit.load64(AssemblyHelpers::Address(exit.m_jsValueSource.asAddress()), GPRInfo::tagTypeNumberRegister);
m_jit.store64(GPRInfo::tagTypeNumberRegister, bucket);
m_jit.move(AssemblyHelpers::TrustedImm64(TagTypeNumber), GPRInfo::tagTypeNumberRegister);
} else
m_jit.store64(exit.m_jsValueSource.gpr(), bucket);
}
}
// What follows is an intentionally simple OSR exit implementation that generates
// fairly poor code but is very easy to hack. In particular, it dumps all state that
// needs conversion into a scratch buffer so that in step 6, where we actually do the
// conversions, we know that all temp registers are free to use and the variable is
// definitely in a well-known spot in the scratch buffer regardless of whether it had
// originally been in a register or spilled. This allows us to decouple "where was
// the variable" from "how was it represented". Consider that the
// Int32DisplacedInJSStack recovery: it tells us that the value is in a
// particular place and that that place holds an unboxed int32. We have two different
// places that a value could be (displaced, register) and a bunch of different
// ways of representing a value. The number of recoveries is two * a bunch. The code
// below means that we have to have two + a bunch cases rather than two * a bunch.
// Once we have loaded the value from wherever it was, the reboxing is the same
// regardless of its location. Likewise, before we do the reboxing, the way we get to
// the value (i.e. where we load it from) is the same regardless of its type. Because
// the code below always dumps everything into a scratch buffer first, the two
// questions become orthogonal, which simplifies adding new types and adding new
// locations.
//
// This raises the question: does using such a suboptimal implementation of OSR exit,
// where we always emit code to dump all state into a scratch buffer only to then
// dump it right back into the stack, hurt us in any way? The asnwer is that OSR exits
// are rare. Our tiering strategy ensures this. This is because if an OSR exit is
// taken more than ~100 times, we jettison the DFG code block along with all of its
// exits. It is impossible for an OSR exit - i.e. the code we compile below - to
// execute frequently enough for the codegen to matter that much. It probably matters
// enough that we don't want to turn this into some super-slow function call, but so
// long as we're generating straight-line code, that code can be pretty bad. Also
// because we tend to exit only along one OSR exit from any DFG code block - that's an
// empirical result that we're extremely confident about - the code size of this
// doesn't matter much. Hence any attempt to optimize the codegen here is just purely
// harmful to the system: it probably won't reduce either net memory usage or net
// execution time. It will only prevent us from cleanly decoupling "where was the
// variable" from "how was it represented", which will make it more difficult to add
// features in the future and it will make it harder to reason about bugs.
// Save all state from GPRs into the scratch buffer.
ScratchBuffer* scratchBuffer = m_jit.vm()->scratchBufferForSize(sizeof(EncodedJSValue) * operands.size());
EncodedJSValue* scratch = scratchBuffer ? static_cast<EncodedJSValue*>(scratchBuffer->dataBuffer()) : 0;
for (size_t index = 0; index < operands.size(); ++index) {
const ValueRecovery& recovery = operands[index];
switch (recovery.technique()) {
case InGPR:
case UnboxedInt32InGPR:
case UnboxedInt52InGPR:
case UnboxedStrictInt52InGPR:
case UnboxedCellInGPR:
m_jit.store64(recovery.gpr(), scratch + index);
break;
default:
break;
}
}
// And voila, all GPRs are free to reuse.
// Save all state from FPRs into the scratch buffer.
for (size_t index = 0; index < operands.size(); ++index) {
const ValueRecovery& recovery = operands[index];
switch (recovery.technique()) {
case UnboxedDoubleInFPR:
case InFPR:
m_jit.move(AssemblyHelpers::TrustedImmPtr(scratch + index), GPRInfo::regT0);
m_jit.storeDouble(recovery.fpr(), MacroAssembler::Address(GPRInfo::regT0));
break;
default:
break;
}
}
// Now, all FPRs are also free.
// Save all state from the stack into the scratch buffer. For simplicity we
// do this even for state that's already in the right place on the stack.
// It makes things simpler later.
for (size_t index = 0; index < operands.size(); ++index) {
const ValueRecovery& recovery = operands[index];
switch (recovery.technique()) {
case DisplacedInJSStack:
case CellDisplacedInJSStack:
case BooleanDisplacedInJSStack:
case Int32DisplacedInJSStack:
case DoubleDisplacedInJSStack:
case Int52DisplacedInJSStack:
case StrictInt52DisplacedInJSStack:
m_jit.load64(AssemblyHelpers::addressFor(recovery.virtualRegister()), GPRInfo::regT0);
m_jit.store64(GPRInfo::regT0, scratch + index);
break;
default:
break;
}
}
// Need to ensure that the stack pointer accounts for the worst-case stack usage at exit. This
// could toast some stack that the DFG used. We need to do it before storing to stack offsets
// used by baseline.
m_jit.addPtr(
CCallHelpers::TrustedImm32(
-m_jit.codeBlock()->jitCode()->dfgCommon()->requiredRegisterCountForExit * sizeof(Register)),
CCallHelpers::framePointerRegister, CCallHelpers::stackPointerRegister);
// Restore the DFG callee saves and then save the ones the baseline JIT uses.
m_jit.emitRestoreCalleeSaves();
m_jit.emitSaveCalleeSavesFor(m_jit.baselineCodeBlock());
// The tag registers are needed to materialize recoveries below.
m_jit.emitMaterializeTagCheckRegisters();
if (exit.isExceptionHandler())
m_jit.copyCalleeSavesToVMCalleeSavesBuffer();
// Do all data format conversions and store the results into the stack.
for (size_t index = 0; index < operands.size(); ++index) {
const ValueRecovery& recovery = operands[index];
VirtualRegister reg = operands.virtualRegisterForIndex(index);
if (reg.isLocal() && reg.toLocal() < static_cast<int>(m_jit.baselineCodeBlock()->calleeSaveSpaceAsVirtualRegisters()))
continue;
int operand = reg.offset();
switch (recovery.technique()) {
case InGPR:
case UnboxedCellInGPR:
case DisplacedInJSStack:
case CellDisplacedInJSStack:
case BooleanDisplacedInJSStack:
case InFPR:
m_jit.load64(scratch + index, GPRInfo::regT0);
m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
break;
case UnboxedInt32InGPR:
case Int32DisplacedInJSStack:
m_jit.load64(scratch + index, GPRInfo::regT0);
m_jit.zeroExtend32ToPtr(GPRInfo::regT0, GPRInfo::regT0);
m_jit.or64(GPRInfo::tagTypeNumberRegister, GPRInfo::regT0);
m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
break;
case UnboxedInt52InGPR:
case Int52DisplacedInJSStack:
m_jit.load64(scratch + index, GPRInfo::regT0);
m_jit.rshift64(
AssemblyHelpers::TrustedImm32(JSValue::int52ShiftAmount), GPRInfo::regT0);
m_jit.boxInt52(GPRInfo::regT0, GPRInfo::regT0, GPRInfo::regT1, FPRInfo::fpRegT0);
m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
break;
case UnboxedStrictInt52InGPR:
case StrictInt52DisplacedInJSStack:
m_jit.load64(scratch + index, GPRInfo::regT0);
m_jit.boxInt52(GPRInfo::regT0, GPRInfo::regT0, GPRInfo::regT1, FPRInfo::fpRegT0);
m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
break;
case UnboxedDoubleInFPR:
case DoubleDisplacedInJSStack:
m_jit.move(AssemblyHelpers::TrustedImmPtr(scratch + index), GPRInfo::regT0);
m_jit.loadDouble(MacroAssembler::Address(GPRInfo::regT0), FPRInfo::fpRegT0);
m_jit.purifyNaN(FPRInfo::fpRegT0);
m_jit.boxDouble(FPRInfo::fpRegT0, GPRInfo::regT0);
m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
break;
case Constant:
m_jit.store64(
AssemblyHelpers::TrustedImm64(JSValue::encode(recovery.constant())),
AssemblyHelpers::addressFor(operand));
break;
case DirectArgumentsThatWereNotCreated:
case ClonedArgumentsThatWereNotCreated:
// Don't do this, yet.
break;
default:
RELEASE_ASSERT_NOT_REACHED();
break;
}
}
// Now that things on the stack are recovered, do the arguments recovery. We assume that arguments
// recoveries don't recursively refer to each other. But, we don't try to assume that they only
// refer to certain ranges of locals. Hence why we need to do this here, once the stack is sensible.
// Note that we also roughly assume that the arguments might still be materialized outside of its
// inline call frame scope - but for now the DFG wouldn't do that.
emitRestoreArguments(operands);
// Adjust the old JIT's execute counter. Since we are exiting OSR, we know
// that all new calls into this code will go to the new JIT, so the execute
// counter only affects call frames that performed OSR exit and call frames
// that were still executing the old JIT at the time of another call frame's
// OSR exit. We want to ensure that the following is true:
//
// (a) Code the performs an OSR exit gets a chance to reenter optimized
// code eventually, since optimized code is faster. But we don't
// want to do such reentery too aggressively (see (c) below).
//
// (b) If there is code on the call stack that is still running the old
// JIT's code and has never OSR'd, then it should get a chance to
// perform OSR entry despite the fact that we've exited.
//
// (c) Code the performs an OSR exit should not immediately retry OSR
// entry, since both forms of OSR are expensive. OSR entry is
// particularly expensive.
//
// (d) Frequent OSR failures, even those that do not result in the code
// running in a hot loop, result in recompilation getting triggered.
//
// To ensure (c), we'd like to set the execute counter to
// counterValueForOptimizeAfterWarmUp(). This seems like it would endanger
// (a) and (b), since then every OSR exit would delay the opportunity for
// every call frame to perform OSR entry. Essentially, if OSR exit happens
// frequently and the function has few loops, then the counter will never
// become non-negative and OSR entry will never be triggered. OSR entry
// will only happen if a loop gets hot in the old JIT, which does a pretty
// good job of ensuring (a) and (b). But that doesn't take care of (d),
// since each speculation failure would reset the execute counter.
// So we check here if the number of speculation failures is significantly
// larger than the number of successes (we want 90% success rate), and if
// there have been a large enough number of failures. If so, we set the
// counter to 0; otherwise we set the counter to
// counterValueForOptimizeAfterWarmUp().
handleExitCounts(m_jit, exit);
// Reify inlined call frames.
reifyInlinedCallFrames(m_jit, exit);
// And finish.
adjustAndJumpToTarget(m_jit, exit);
}
} } // namespace JSC::DFG
#endif // ENABLE(DFG_JIT) && USE(JSVALUE64)
|