source: webkit/trunk/Source/JavaScriptCore/jit/BinarySwitch.cpp

Last change on this file was 261755, checked in by Ross Kirsling, 5 years ago

[IWYU] Remove unnecessary includes from JSC implementation files
https://bugs.webkit.org/show_bug.cgi?id=211867

Reviewed by Keith Miller.

  • API/:
  • assembler/:
  • b3/:
  • bindings/:
  • builtins/BuiltinExecutables.cpp:
  • bytecode/:
  • bytecompiler/:
  • debugger/:
  • dfg/:
  • disassembler/:
  • ftl/:
  • heap/:
  • inspector/:
  • interpreter/:
  • jit/:
  • jsc.cpp:
  • llint/:
  • parser/:
  • profiler/:
  • runtime/:
  • testRegExp.cpp:
  • tools/:
  • wasm/:
  • yarr/:
File size: 17.4 KB
Line 
1/*
2 * Copyright (C) 2013-2019 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "BinarySwitch.h"
28
29#if ENABLE(JIT)
30
31#include <wtf/ListDump.h>
32
33namespace JSC {
34
35namespace BinarySwitchInternal {
36static constexpr bool verbose = false;
37}
38
39static unsigned globalCounter; // We use a different seed every time we are invoked.
40
41BinarySwitch::BinarySwitch(GPRReg value, const Vector<int64_t>& cases, Type type)
42 : m_type(type)
43 , m_value(value)
44 , m_weakRandom(globalCounter++)
45 , m_index(0)
46 , m_caseIndex(UINT_MAX)
47{
48 if (cases.isEmpty())
49 return;
50
51 if (BinarySwitchInternal::verbose)
52 dataLog("Original cases: ", listDump(cases), "\n");
53
54 for (unsigned i = 0; i < cases.size(); ++i)
55 m_cases.append(Case(cases[i], i));
56
57 std::sort(m_cases.begin(), m_cases.end());
58
59 if (BinarySwitchInternal::verbose)
60 dataLog("Sorted cases: ", listDump(m_cases), "\n");
61
62#if ASSERT_ENABLED
63 for (unsigned i = 1; i < m_cases.size(); ++i)
64 ASSERT(m_cases[i - 1] < m_cases[i], i, m_cases.size(), m_cases[i].value, m_cases[i].index);
65#endif
66
67 build(0, false, m_cases.size());
68}
69
70BinarySwitch::~BinarySwitch()
71{
72}
73
74bool BinarySwitch::advance(MacroAssembler& jit)
75{
76 if (m_cases.isEmpty()) {
77 m_fallThrough.append(jit.jump());
78 return false;
79 }
80
81 if (m_index == m_branches.size()) {
82 RELEASE_ASSERT(m_jumpStack.isEmpty());
83 return false;
84 }
85
86 for (;;) {
87 const BranchCode& code = m_branches[m_index++];
88 switch (code.kind) {
89 case NotEqualToFallThrough:
90 switch (m_type) {
91 case Int32:
92 m_fallThrough.append(jit.branch32(
93 MacroAssembler::NotEqual, m_value,
94 MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));
95 break;
96 case IntPtr:
97 m_fallThrough.append(jit.branchPtr(
98 MacroAssembler::NotEqual, m_value,
99 MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));
100 break;
101 }
102 break;
103 case NotEqualToPush:
104 switch (m_type) {
105 case Int32:
106 m_jumpStack.append(jit.branch32(
107 MacroAssembler::NotEqual, m_value,
108 MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));
109 break;
110 case IntPtr:
111 m_jumpStack.append(jit.branchPtr(
112 MacroAssembler::NotEqual, m_value,
113 MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));
114 break;
115 }
116 break;
117 case LessThanToPush:
118 switch (m_type) {
119 case Int32:
120 m_jumpStack.append(jit.branch32(
121 MacroAssembler::LessThan, m_value,
122 MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));
123 break;
124 case IntPtr:
125 m_jumpStack.append(jit.branchPtr(
126 MacroAssembler::LessThan, m_value,
127 MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));
128 break;
129 }
130 break;
131 case Pop:
132 m_jumpStack.takeLast().link(&jit);
133 break;
134 case ExecuteCase:
135 m_caseIndex = code.index;
136 return true;
137 }
138 }
139}
140
141class RandomNumberGenerator {
142public:
143 using result_type = uint32_t;
144
145 RandomNumberGenerator(WeakRandom& weakRandom)
146 : m_weakRandom(weakRandom)
147 {
148 }
149
150 uint32_t operator()()
151 {
152 return m_weakRandom.getUint32();
153 }
154
155 static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
156 static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
157
158private:
159 WeakRandom& m_weakRandom;
160};
161
162void BinarySwitch::build(unsigned start, bool hardStart, unsigned end)
163{
164 if (BinarySwitchInternal::verbose)
165 dataLog("Building with start = ", start, ", hardStart = ", hardStart, ", end = ", end, "\n");
166
167 auto append = [&] (const BranchCode& code) {
168 if (BinarySwitchInternal::verbose)
169 dataLog("==> ", code, "\n");
170 m_branches.append(code);
171 };
172
173 unsigned size = end - start;
174
175 RELEASE_ASSERT(size);
176
177 // This code uses some random numbers to keep things balanced. It's important to keep in mind
178 // that this does not improve average-case throughput under the assumption that all cases fire
179 // with equal probability. It just ensures that there will not be some switch structure that
180 // when combined with some input will always produce pathologically good or pathologically bad
181 // performance.
182
183 const unsigned leafThreshold = 3;
184
185 if (size <= leafThreshold) {
186 if (BinarySwitchInternal::verbose)
187 dataLog("It's a leaf.\n");
188
189 // It turns out that for exactly three cases or less, it's better to just compare each
190 // case individually. This saves 1/6 of a branch on average, and up to 1/3 of a branch in
191 // extreme cases where the divide-and-conquer bottoms out in a lot of 3-case subswitches.
192 //
193 // This assumes that we care about the cost of hitting some case more than we care about
194 // bottoming out in a default case. I believe that in most places where we use switch
195 // statements, we are more likely to hit one of the cases than we are to fall through to
196 // default. Intuitively, if we wanted to improve the performance of default, we would
197 // reduce the value of leafThreshold to 2 or even to 1. See below for a deeper discussion.
198
199 bool allConsecutive = false;
200
201 if ((hardStart || (start && m_cases[start - 1].value == m_cases[start].value - 1))
202 && start + size < m_cases.size()
203 && m_cases[start + size - 1].value == m_cases[start + size].value - 1) {
204 allConsecutive = true;
205 for (unsigned i = 0; i < size - 1; ++i) {
206 if (m_cases[start + i].value + 1 != m_cases[start + i + 1].value) {
207 allConsecutive = false;
208 break;
209 }
210 }
211 }
212
213 if (BinarySwitchInternal::verbose)
214 dataLog("allConsecutive = ", allConsecutive, "\n");
215
216 Vector<unsigned, 3> localCaseIndices;
217 for (unsigned i = 0; i < size; ++i)
218 localCaseIndices.append(start + i);
219
220 std::shuffle(
221 localCaseIndices.begin(), localCaseIndices.end(),
222 RandomNumberGenerator(m_weakRandom));
223
224 for (unsigned i = 0; i < size - 1; ++i) {
225 append(BranchCode(NotEqualToPush, localCaseIndices[i]));
226 append(BranchCode(ExecuteCase, localCaseIndices[i]));
227 append(BranchCode(Pop));
228 }
229
230 if (!allConsecutive)
231 append(BranchCode(NotEqualToFallThrough, localCaseIndices.last()));
232
233 append(BranchCode(ExecuteCase, localCaseIndices.last()));
234 return;
235 }
236
237 if (BinarySwitchInternal::verbose)
238 dataLog("It's not a leaf.\n");
239
240 // There are two different strategies we could consider here:
241 //
242 // Isolate median and split: pick a median and check if the comparison value is equal to it;
243 // if so, execute the median case. Otherwise check if the value is less than the median, and
244 // recurse left or right based on this. This has two subvariants: we could either first test
245 // equality for the median and then do the less-than, or we could first do the less-than and
246 // then check equality on the not-less-than path.
247 //
248 // Ignore median and split: do a less-than comparison on a value that splits the cases in two
249 // equal-sized halves. Recurse left or right based on the comparison. Do not test for equality
250 // against the median (or anything else); let the recursion handle those equality comparisons
251 // once we bottom out in a list that case 3 cases or less (see above).
252 //
253 // I'll refer to these strategies as Isolate and Ignore. I initially believed that Isolate
254 // would be faster since it leads to less branching for some lucky cases. It turns out that
255 // Isolate is almost a total fail in the average, assuming all cases are equally likely. How
256 // bad Isolate is depends on whether you believe that doing two consecutive branches based on
257 // the same comparison is cheaper than doing the compare/branches separately. This is
258 // difficult to evaluate. For small immediates that aren't blinded, we just care about
259 // avoiding a second compare instruction. For large immediates or when blinding is in play, we
260 // also care about the instructions used to materialize the immediate a second time. Isolate
261 // can help with both costs since it involves first doing a < compare+branch on some value,
262 // followed by a == compare+branch on the same exact value (or vice-versa). Ignore will do a <
263 // compare+branch on some value, and then the == compare+branch on that same value will happen
264 // much later.
265 //
266 // To evaluate these costs, I wrote the recurrence relation for Isolate and Ignore, assuming
267 // that ComparisonCost is the cost of a compare+branch and ChainedComparisonCost is the cost
268 // of a compare+branch on some value that you've just done another compare+branch for. These
269 // recurrence relations compute the total cost incurred if you executed the switch statement
270 // on each matching value. So the average cost of hitting some case can be computed as
271 // Isolate[n]/n or Ignore[n]/n, respectively for the two relations.
272 //
273 // Isolate[1] = ComparisonCost
274 // Isolate[2] = (2 + 1) * ComparisonCost
275 // Isolate[3] = (3 + 2 + 1) * ComparisonCost
276 // Isolate[n_] := With[
277 // {medianIndex = Floor[n/2] + If[EvenQ[n], RandomInteger[], 1]},
278 // ComparisonCost + ChainedComparisonCost +
279 // (ComparisonCost * (medianIndex - 1) + Isolate[medianIndex - 1]) +
280 // (2 * ComparisonCost * (n - medianIndex) + Isolate[n - medianIndex])]
281 //
282 // Ignore[1] = ComparisonCost
283 // Ignore[2] = (2 + 1) * ComparisonCost
284 // Ignore[3] = (3 + 2 + 1) * ComparisonCost
285 // Ignore[n_] := With[
286 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
287 // (medianIndex * ComparisonCost + Ignore[medianIndex]) +
288 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]
289 //
290 // This does not account for the average cost of hitting the default case. See further below
291 // for a discussion of that.
292 //
293 // It turns out that for ComparisonCost = 1 and ChainedComparisonCost = 1, Ignore is always
294 // better than Isolate. If we assume that ChainedComparisonCost = 0, then Isolate wins for
295 // switch statements that have 20 cases or fewer, though the margin of victory is never large
296 // - it might sometimes save an average of 0.3 ComparisonCost. For larger switch statements,
297 // we see divergence between the two with Ignore winning. This is of course rather
298 // unrealistic since the chained comparison is never free. For ChainedComparisonCost = 0.5, we
299 // see Isolate winning for 10 cases or fewer, by maybe 0.2 ComparisonCost. Again we see
300 // divergence for large switches with Ignore winning, for example if a switch statement has
301 // 100 cases then Ignore saves one branch on average.
302 //
303 // Our current JIT backends don't provide for optimization for chained comparisons, except for
304 // reducing the code for materializing the immediate if the immediates are large or blinding
305 // comes into play. Probably our JIT backends live somewhere north of
306 // ChainedComparisonCost = 0.5.
307 //
308 // This implies that using the Ignore strategy is likely better. If we wanted to incorporate
309 // the Isolate strategy, we'd want to determine the switch size threshold at which the two
310 // cross over and then use Isolate for switches that are smaller than that size.
311 //
312 // The average cost of hitting the default case is similar, but involves a different cost for
313 // the base cases: you have to assume that you will always fail each branch. For the Ignore
314 // strategy we would get this recurrence relation; the same kind of thing happens to the
315 // Isolate strategy:
316 //
317 // Ignore[1] = ComparisonCost
318 // Ignore[2] = (2 + 2) * ComparisonCost
319 // Ignore[3] = (3 + 3 + 3) * ComparisonCost
320 // Ignore[n_] := With[
321 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
322 // (medianIndex * ComparisonCost + Ignore[medianIndex]) +
323 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]
324 //
325 // This means that if we cared about the default case more, we would likely reduce
326 // leafThreshold. Reducing it to 2 would reduce the average cost of the default case by 1/3
327 // in the most extreme cases (num switch cases = 3, 6, 12, 24, ...). But it would also
328 // increase the average cost of taking one of the non-default cases by 1/3. Typically the
329 // difference is 1/6 in either direction. This makes it a very simple trade-off: if we believe
330 // that the default case is more important then we would want leafThreshold to be 2, and the
331 // default case would become 1/6 faster on average. But we believe that most switch statements
332 // are more likely to take one of the cases than the default, so we use leafThreshold = 3
333 // and get a 1/6 speed-up on average for taking an explicit case.
334
335 unsigned medianIndex = (start + end) / 2;
336
337 if (BinarySwitchInternal::verbose)
338 dataLog("medianIndex = ", medianIndex, "\n");
339
340 // We want medianIndex to point to the thing we will do a less-than compare against. We want
341 // this less-than compare to split the current sublist into equal-sized sublists, or
342 // nearly-equal-sized with some randomness if we're in the odd case. With the above
343 // calculation, in the odd case we will have medianIndex pointing at either the element we
344 // want or the element to the left of the one we want. Consider the case of five elements:
345 //
346 // 0 1 2 3 4
347 //
348 // start will be 0, end will be 5. The average is 2.5, which rounds down to 2. If we do
349 // value < 2, then we will split the list into 2 elements on the left and three on the right.
350 // That's pretty good, but in this odd case we'd like to at random choose 3 instead to ensure
351 // that we don't become unbalanced on the right. This does not improve throughput since one
352 // side will always get shafted, and that side might still be odd, in which case it will also
353 // have two sides and one of them will get shafted - and so on. We just want to avoid
354 // deterministic pathologies.
355 //
356 // In the even case, we will always end up pointing at the element we want:
357 //
358 // 0 1 2 3
359 //
360 // start will be 0, end will be 4. So, the average is 2, which is what we'd like.
361 if (size & 1) {
362 RELEASE_ASSERT(medianIndex - start + 1 == end - medianIndex);
363 medianIndex += m_weakRandom.getUint32() & 1;
364 } else
365 RELEASE_ASSERT(medianIndex - start == end - medianIndex);
366
367 RELEASE_ASSERT(medianIndex > start);
368 RELEASE_ASSERT(medianIndex + 1 < end);
369
370 if (BinarySwitchInternal::verbose)
371 dataLog("fixed medianIndex = ", medianIndex, "\n");
372
373 append(BranchCode(LessThanToPush, medianIndex));
374 build(medianIndex, true, end);
375 append(BranchCode(Pop));
376 build(start, hardStart, medianIndex);
377}
378
379void BinarySwitch::Case::dump(PrintStream& out) const
380{
381 out.print("<value: " , value, ", index: ", index, ">");
382}
383
384void BinarySwitch::BranchCode::dump(PrintStream& out) const
385{
386 switch (kind) {
387 case NotEqualToFallThrough:
388 out.print("NotEqualToFallThrough");
389 break;
390 case NotEqualToPush:
391 out.print("NotEqualToPush");
392 break;
393 case LessThanToPush:
394 out.print("LessThanToPush");
395 break;
396 case Pop:
397 out.print("Pop");
398 break;
399 case ExecuteCase:
400 out.print("ExecuteCase");
401 break;
402 }
403
404 if (index != UINT_MAX)
405 out.print("(", index, ")");
406}
407
408} // namespace JSC
409
410#endif // ENABLE(JIT)
411
Note: See TracBrowser for help on using the repository browser.