Context Navigation

BinarySwitch.cpp

Visit:

Last change on this file was 261755, checked in by Ross Kirsling, 5 years ago

[IWYU] Remove unnecessary includes from JSC implementation files
https://bugs.webkit.org/show_bug.cgi?id=211867

Reviewed by Keith Miller.

API/:
assembler/:
b3/:
bindings/:
builtins/BuiltinExecutables.cpp:
bytecode/:
bytecompiler/:
debugger/:
dfg/:
disassembler/:
ftl/:
heap/:
inspector/:
interpreter/:
jit/:
jsc.cpp:
llint/:
parser/:
profiler/:
runtime/:
testRegExp.cpp:
tools/:
wasm/:
yarr/:

File size: 17.4 KB

Line
1	/*
2	* Copyright (C) 2013-2019 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "BinarySwitch.h"
28
29	#if ENABLE(JIT)
30
31	#include <wtf/ListDump.h>
32
33	namespace JSC {
34
35	namespace BinarySwitchInternal {
36	static constexpr bool verbose = false;
37	}
38
39	static unsigned globalCounter; // We use a different seed every time we are invoked.
40
41	BinarySwitch::BinarySwitch(GPRReg value, const Vector<int64_t>& cases, Type type)
42	: m_type(type)
43	, m_value(value)
44	, m_weakRandom(globalCounter++)
45	, m_index(0)
46	, m_caseIndex(UINT_MAX)
47	{
48	if (cases.isEmpty())
49	return;
50
51	if (BinarySwitchInternal::verbose)
52	dataLog("Original cases: ", listDump(cases), "\n");
53
54	for (unsigned i = 0; i < cases.size(); ++i)
55	m_cases.append(Case(cases[i], i));
56
57	std::sort(m_cases.begin(), m_cases.end());
58
59	if (BinarySwitchInternal::verbose)
60	dataLog("Sorted cases: ", listDump(m_cases), "\n");
61
62	#if ASSERT_ENABLED
63	for (unsigned i = 1; i < m_cases.size(); ++i)
64	ASSERT(m_cases[i - 1] < m_cases[i], i, m_cases.size(), m_cases[i].value, m_cases[i].index);
65	#endif
66
67	build(0, false, m_cases.size());
68	}
69
70	BinarySwitch::~BinarySwitch()
71	{
72	}
73
74	bool BinarySwitch::advance(MacroAssembler& jit)
75	{
76	if (m_cases.isEmpty()) {
77	m_fallThrough.append(jit.jump());
78	return false;
79	}
80
81	if (m_index == m_branches.size()) {
82	RELEASE_ASSERT(m_jumpStack.isEmpty());
83	return false;
84	}
85
86	for (;;) {
87	const BranchCode& code = m_branches[m_index++];
88	switch (code.kind) {
89	case NotEqualToFallThrough:
90	switch (m_type) {
91	case Int32:
92	m_fallThrough.append(jit.branch32(
93	MacroAssembler::NotEqual, m_value,
94	MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));
95	break;
96	case IntPtr:
97	m_fallThrough.append(jit.branchPtr(
98	MacroAssembler::NotEqual, m_value,
99	MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));
100	break;
101	}
102	break;
103	case NotEqualToPush:
104	switch (m_type) {
105	case Int32:
106	m_jumpStack.append(jit.branch32(
107	MacroAssembler::NotEqual, m_value,
108	MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));
109	break;
110	case IntPtr:
111	m_jumpStack.append(jit.branchPtr(
112	MacroAssembler::NotEqual, m_value,
113	MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));
114	break;
115	}
116	break;
117	case LessThanToPush:
118	switch (m_type) {
119	case Int32:
120	m_jumpStack.append(jit.branch32(
121	MacroAssembler::LessThan, m_value,
122	MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));
123	break;
124	case IntPtr:
125	m_jumpStack.append(jit.branchPtr(
126	MacroAssembler::LessThan, m_value,
127	MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));
128	break;
129	}
130	break;
131	case Pop:
132	m_jumpStack.takeLast().link(&jit);
133	break;
134	case ExecuteCase:
135	m_caseIndex = code.index;
136	return true;
137	}
138	}
139	}
140
141	class RandomNumberGenerator {
142	public:
143	using result_type = uint32_t;
144
145	RandomNumberGenerator(WeakRandom& weakRandom)
146	: m_weakRandom(weakRandom)
147	{
148	}
149
150	uint32_t operator()()
151	{
152	return m_weakRandom.getUint32();
153	}
154
155	static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
156	static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
157
158	private:
159	WeakRandom& m_weakRandom;
160	};
161
162	void BinarySwitch::build(unsigned start, bool hardStart, unsigned end)
163	{
164	if (BinarySwitchInternal::verbose)
165	dataLog("Building with start = ", start, ", hardStart = ", hardStart, ", end = ", end, "\n");
166
167	auto append = [&] (const BranchCode& code) {
168	if (BinarySwitchInternal::verbose)
169	dataLog("==> ", code, "\n");
170	m_branches.append(code);
171	};
172
173	unsigned size = end - start;
174
175	RELEASE_ASSERT(size);
176
177	// This code uses some random numbers to keep things balanced. It's important to keep in mind
178	// that this does not improve average-case throughput under the assumption that all cases fire
179	// with equal probability. It just ensures that there will not be some switch structure that
180	// when combined with some input will always produce pathologically good or pathologically bad
181	// performance.
182
183	const unsigned leafThreshold = 3;
184
185	if (size <= leafThreshold) {
186	if (BinarySwitchInternal::verbose)
187	dataLog("It's a leaf.\n");
188
189	// It turns out that for exactly three cases or less, it's better to just compare each
190	// case individually. This saves 1/6 of a branch on average, and up to 1/3 of a branch in
191	// extreme cases where the divide-and-conquer bottoms out in a lot of 3-case subswitches.
192	//
193	// This assumes that we care about the cost of hitting some case more than we care about
194	// bottoming out in a default case. I believe that in most places where we use switch
195	// statements, we are more likely to hit one of the cases than we are to fall through to
196	// default. Intuitively, if we wanted to improve the performance of default, we would
197	// reduce the value of leafThreshold to 2 or even to 1. See below for a deeper discussion.
198
199	bool allConsecutive = false;
200
201	if ((hardStart \|\| (start && m_cases[start - 1].value == m_cases[start].value - 1))
202	&& start + size < m_cases.size()
203	&& m_cases[start + size - 1].value == m_cases[start + size].value - 1) {
204	allConsecutive = true;
205	for (unsigned i = 0; i < size - 1; ++i) {
206	if (m_cases[start + i].value + 1 != m_cases[start + i + 1].value) {
207	allConsecutive = false;
208	break;
209	}
210	}
211	}
212
213	if (BinarySwitchInternal::verbose)
214	dataLog("allConsecutive = ", allConsecutive, "\n");
215
216	Vector<unsigned, 3> localCaseIndices;
217	for (unsigned i = 0; i < size; ++i)
218	localCaseIndices.append(start + i);
219
220	std::shuffle(
221	localCaseIndices.begin(), localCaseIndices.end(),
222	RandomNumberGenerator(m_weakRandom));
223
224	for (unsigned i = 0; i < size - 1; ++i) {
225	append(BranchCode(NotEqualToPush, localCaseIndices[i]));
226	append(BranchCode(ExecuteCase, localCaseIndices[i]));
227	append(BranchCode(Pop));
228	}
229
230	if (!allConsecutive)
231	append(BranchCode(NotEqualToFallThrough, localCaseIndices.last()));
232
233	append(BranchCode(ExecuteCase, localCaseIndices.last()));
234	return;
235	}
236
237	if (BinarySwitchInternal::verbose)
238	dataLog("It's not a leaf.\n");
239
240	// There are two different strategies we could consider here:
241	//
242	// Isolate median and split: pick a median and check if the comparison value is equal to it;
243	// if so, execute the median case. Otherwise check if the value is less than the median, and
244	// recurse left or right based on this. This has two subvariants: we could either first test
245	// equality for the median and then do the less-than, or we could first do the less-than and
246	// then check equality on the not-less-than path.
247	//
248	// Ignore median and split: do a less-than comparison on a value that splits the cases in two
249	// equal-sized halves. Recurse left or right based on the comparison. Do not test for equality
250	// against the median (or anything else); let the recursion handle those equality comparisons
251	// once we bottom out in a list that case 3 cases or less (see above).
252	//
253	// I'll refer to these strategies as Isolate and Ignore. I initially believed that Isolate
254	// would be faster since it leads to less branching for some lucky cases. It turns out that
255	// Isolate is almost a total fail in the average, assuming all cases are equally likely. How
256	// bad Isolate is depends on whether you believe that doing two consecutive branches based on
257	// the same comparison is cheaper than doing the compare/branches separately. This is
258	// difficult to evaluate. For small immediates that aren't blinded, we just care about
259	// avoiding a second compare instruction. For large immediates or when blinding is in play, we
260	// also care about the instructions used to materialize the immediate a second time. Isolate
261	// can help with both costs since it involves first doing a < compare+branch on some value,
262	// followed by a == compare+branch on the same exact value (or vice-versa). Ignore will do a <
263	// compare+branch on some value, and then the == compare+branch on that same value will happen
264	// much later.
265	//
266	// To evaluate these costs, I wrote the recurrence relation for Isolate and Ignore, assuming
267	// that ComparisonCost is the cost of a compare+branch and ChainedComparisonCost is the cost
268	// of a compare+branch on some value that you've just done another compare+branch for. These
269	// recurrence relations compute the total cost incurred if you executed the switch statement
270	// on each matching value. So the average cost of hitting some case can be computed as
271	// Isolate[n]/n or Ignore[n]/n, respectively for the two relations.
272	//
273	// Isolate[1] = ComparisonCost
274	// Isolate[2] = (2 + 1) * ComparisonCost
275	// Isolate[3] = (3 + 2 + 1) * ComparisonCost
276	// Isolate[n_] := With[
277	// {medianIndex = Floor[n/2] + If[EvenQ[n], RandomInteger[], 1]},
278	// ComparisonCost + ChainedComparisonCost +
279	// (ComparisonCost * (medianIndex - 1) + Isolate[medianIndex - 1]) +
280	// (2 * ComparisonCost * (n - medianIndex) + Isolate[n - medianIndex])]
281	//
282	// Ignore[1] = ComparisonCost
283	// Ignore[2] = (2 + 1) * ComparisonCost
284	// Ignore[3] = (3 + 2 + 1) * ComparisonCost
285	// Ignore[n_] := With[
286	// {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
287	// (medianIndex * ComparisonCost + Ignore[medianIndex]) +
288	// ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]
289	//
290	// This does not account for the average cost of hitting the default case. See further below
291	// for a discussion of that.
292	//
293	// It turns out that for ComparisonCost = 1 and ChainedComparisonCost = 1, Ignore is always
294	// better than Isolate. If we assume that ChainedComparisonCost = 0, then Isolate wins for
295	// switch statements that have 20 cases or fewer, though the margin of victory is never large
296	// - it might sometimes save an average of 0.3 ComparisonCost. For larger switch statements,
297	// we see divergence between the two with Ignore winning. This is of course rather
298	// unrealistic since the chained comparison is never free. For ChainedComparisonCost = 0.5, we
299	// see Isolate winning for 10 cases or fewer, by maybe 0.2 ComparisonCost. Again we see
300	// divergence for large switches with Ignore winning, for example if a switch statement has
301	// 100 cases then Ignore saves one branch on average.
302	//
303	// Our current JIT backends don't provide for optimization for chained comparisons, except for
304	// reducing the code for materializing the immediate if the immediates are large or blinding
305	// comes into play. Probably our JIT backends live somewhere north of
306	// ChainedComparisonCost = 0.5.
307	//
308	// This implies that using the Ignore strategy is likely better. If we wanted to incorporate
309	// the Isolate strategy, we'd want to determine the switch size threshold at which the two
310	// cross over and then use Isolate for switches that are smaller than that size.
311	//
312	// The average cost of hitting the default case is similar, but involves a different cost for
313	// the base cases: you have to assume that you will always fail each branch. For the Ignore
314	// strategy we would get this recurrence relation; the same kind of thing happens to the
315	// Isolate strategy:
316	//
317	// Ignore[1] = ComparisonCost
318	// Ignore[2] = (2 + 2) * ComparisonCost
319	// Ignore[3] = (3 + 3 + 3) * ComparisonCost
320	// Ignore[n_] := With[
321	// {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
322	// (medianIndex * ComparisonCost + Ignore[medianIndex]) +
323	// ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]
324	//
325	// This means that if we cared about the default case more, we would likely reduce
326	// leafThreshold. Reducing it to 2 would reduce the average cost of the default case by 1/3
327	// in the most extreme cases (num switch cases = 3, 6, 12, 24, ...). But it would also
328	// increase the average cost of taking one of the non-default cases by 1/3. Typically the
329	// difference is 1/6 in either direction. This makes it a very simple trade-off: if we believe
330	// that the default case is more important then we would want leafThreshold to be 2, and the
331	// default case would become 1/6 faster on average. But we believe that most switch statements
332	// are more likely to take one of the cases than the default, so we use leafThreshold = 3
333	// and get a 1/6 speed-up on average for taking an explicit case.
334
335	unsigned medianIndex = (start + end) / 2;
336
337	if (BinarySwitchInternal::verbose)
338	dataLog("medianIndex = ", medianIndex, "\n");
339
340	// We want medianIndex to point to the thing we will do a less-than compare against. We want
341	// this less-than compare to split the current sublist into equal-sized sublists, or
342	// nearly-equal-sized with some randomness if we're in the odd case. With the above
343	// calculation, in the odd case we will have medianIndex pointing at either the element we
344	// want or the element to the left of the one we want. Consider the case of five elements:
345	//
346	// 0 1 2 3 4
347	//
348	// start will be 0, end will be 5. The average is 2.5, which rounds down to 2. If we do
349	// value < 2, then we will split the list into 2 elements on the left and three on the right.
350	// That's pretty good, but in this odd case we'd like to at random choose 3 instead to ensure
351	// that we don't become unbalanced on the right. This does not improve throughput since one
352	// side will always get shafted, and that side might still be odd, in which case it will also
353	// have two sides and one of them will get shafted - and so on. We just want to avoid
354	// deterministic pathologies.
355	//
356	// In the even case, we will always end up pointing at the element we want:
357	//
358	// 0 1 2 3
359	//
360	// start will be 0, end will be 4. So, the average is 2, which is what we'd like.
361	if (size & 1) {
362	RELEASE_ASSERT(medianIndex - start + 1 == end - medianIndex);
363	medianIndex += m_weakRandom.getUint32() & 1;
364	} else
365	RELEASE_ASSERT(medianIndex - start == end - medianIndex);
366
367	RELEASE_ASSERT(medianIndex > start);
368	RELEASE_ASSERT(medianIndex + 1 < end);
369
370	if (BinarySwitchInternal::verbose)
371	dataLog("fixed medianIndex = ", medianIndex, "\n");
372
373	append(BranchCode(LessThanToPush, medianIndex));
374	build(medianIndex, true, end);
375	append(BranchCode(Pop));
376	build(start, hardStart, medianIndex);
377	}
378
379	void BinarySwitch::Case::dump(PrintStream& out) const
380	{
381	out.print("<value: " , value, ", index: ", index, ">");
382	}
383
384	void BinarySwitch::BranchCode::dump(PrintStream& out) const
385	{
386	switch (kind) {
387	case NotEqualToFallThrough:
388	out.print("NotEqualToFallThrough");
389	break;
390	case NotEqualToPush:
391	out.print("NotEqualToPush");
392	break;
393	case LessThanToPush:
394	out.print("LessThanToPush");
395	break;
396	case Pop:
397	out.print("Pop");
398	break;
399	case ExecuteCase:
400	out.print("ExecuteCase");
401	break;
402	}
403
404	if (index != UINT_MAX)
405	out.print("(", index, ")");
406	}
407
408	} // namespace JSC
409
410	#endif // ENABLE(JIT)
411

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/Source/JavaScriptCore/jit/BinarySwitch.cpp

Download in other formats: