Changeset 179490 in webkit for trunk/Source/JavaScriptCore/jit/BinarySwitch.cpp
- Timestamp:
- Feb 2, 2015, 12:52:01 PM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/jit/BinarySwitch.cpp
r179223 r179490 33 33 namespace JSC { 34 34 35 static unsigned globalCounter; // We use a different seed every time we are invoked. 36 35 37 BinarySwitch::BinarySwitch(GPRReg value, const Vector<int64_t>& cases, Type type) 36 38 : m_value(value) 39 , m_weakRandom(globalCounter++) 37 40 , m_index(0) 38 41 , m_caseIndex(UINT_MAX) 39 , m_medianBias(0)40 42 , m_type(type) 41 43 { … … 46 48 m_cases.append(Case(cases[i], i)); 47 49 std::sort(m_cases.begin(), m_cases.end()); 48 build(0, m_cases.size()); 50 build(0, false, m_cases.size()); 51 } 52 53 BinarySwitch::~BinarySwitch() 54 { 49 55 } 50 56 … … 116 122 } 117 123 118 void BinarySwitch::build(unsigned start, unsigned end)124 void BinarySwitch::build(unsigned start, bool hardStart, unsigned end) 119 125 { 120 126 unsigned size = end - start; 121 127 122 switch (size) { 123 case 0: { 124 RELEASE_ASSERT_NOT_REACHED(); 125 break; 128 RELEASE_ASSERT(size); 129 130 // This code uses some random numbers to keep things balanced. It's important to keep in mind 131 // that this does not improve average-case throughput under the assumption that all cases fire 132 // with equal probability. It just ensures that there will not be some switch structure that 133 // when combined with some input will always produce pathologically good or pathologically bad 134 // performance. 135 136 const unsigned leafThreshold = 3; 137 138 if (size <= leafThreshold) { 139 // It turns out that for exactly three cases or less, it's better to just compare each 140 // case individually. This saves 1/6 of a branch on average, and up to 1/3 of a branch in 141 // extreme cases where the divide-and-conquer bottoms out in a lot of 3-case subswitches. 142 // 143 // This assumes that we care about the cost of hitting some case more than we care about 144 // bottoming out in a default case. I believe that in most places where we use switch 145 // statements, we are more likely to hit one of the cases than we are to fall through to 146 // default. Intuitively, if we wanted to improve the performance of default, we would 147 // reduce the value of leafThreshold to 2 or even to 1. See below for a deeper discussion. 148 149 bool allConsecutive = false; 150 151 if ((hardStart || (start && m_cases[start - 1].value == m_cases[start].value - 1)) 152 && start + size < m_cases.size() 153 && m_cases[start + size - 1].value == m_cases[start + size].value - 1) { 154 allConsecutive = true; 155 for (unsigned i = 0; i < size - 1; ++i) { 156 if (m_cases[i].value + 1 != m_cases[i + 1].value) { 157 allConsecutive = false; 158 break; 159 } 160 } 161 } 162 163 Vector<unsigned, 3> localCaseIndices; 164 for (unsigned i = 0; i < size; ++i) 165 localCaseIndices.append(start + i); 166 167 std::random_shuffle( 168 localCaseIndices.begin(), localCaseIndices.end(), 169 [this] (unsigned n) { 170 // We use modulo to get a random number in the range we want fully knowing that 171 // this introduces a tiny amount of bias, but we're fine with such tiny bias. 172 return m_weakRandom.getUint32() % n; 173 }); 174 175 for (unsigned i = 0; i < size - 1; ++i) { 176 m_branches.append(BranchCode(NotEqualToPush, localCaseIndices[i])); 177 m_branches.append(BranchCode(ExecuteCase, localCaseIndices[i])); 178 m_branches.append(BranchCode(Pop)); 179 } 180 181 if (!allConsecutive) 182 m_branches.append(BranchCode(NotEqualToFallThrough, localCaseIndices.last())); 183 184 m_branches.append(BranchCode(ExecuteCase, localCaseIndices.last())); 185 return; 126 186 } 127 187 128 case 1: { 129 if (start 130 && m_cases[start - 1].value == m_cases[start].value - 1 131 && start + 1 < m_cases.size() 132 && m_cases[start + 1].value == m_cases[start].value + 1) { 133 m_branches.append(BranchCode(ExecuteCase, start)); 134 break; 135 } 136 137 m_branches.append(BranchCode(NotEqualToFallThrough, start)); 138 m_branches.append(BranchCode(ExecuteCase, start)); 139 break; 140 } 141 142 case 2: { 143 if (m_cases[start].value + 1 == m_cases[start + 1].value 144 && start 145 && m_cases[start - 1].value == m_cases[start].value - 1 146 && start + 2 < m_cases.size() 147 && m_cases[start + 2].value == m_cases[start + 1].value + 1) { 148 m_branches.append(BranchCode(NotEqualToPush, start)); 149 m_branches.append(BranchCode(ExecuteCase, start)); 150 m_branches.append(BranchCode(Pop)); 151 m_branches.append(BranchCode(ExecuteCase, start + 1)); 152 break; 153 } 154 155 unsigned firstCase = start; 156 unsigned secondCase = start + 1; 157 if (m_medianBias) 158 std::swap(firstCase, secondCase); 159 m_medianBias ^= 1; 160 161 m_branches.append(BranchCode(NotEqualToPush, firstCase)); 162 m_branches.append(BranchCode(ExecuteCase, firstCase)); 163 m_branches.append(BranchCode(Pop)); 164 m_branches.append(BranchCode(NotEqualToFallThrough, secondCase)); 165 m_branches.append(BranchCode(ExecuteCase, secondCase)); 166 break; 167 } 168 169 default: { 170 unsigned medianIndex = (start + end) / 2; 171 if (!(size & 1)) { 172 // Because end is exclusive, in the even case, this rounds up by 173 // default. Hence median bias sometimes flips to subtracing one 174 // in order to get round-down behavior. 175 medianIndex -= m_medianBias; 176 m_medianBias ^= 1; 177 } 178 179 RELEASE_ASSERT(medianIndex > start); 180 RELEASE_ASSERT(medianIndex + 1 < end); 181 182 m_branches.append(BranchCode(LessThanToPush, medianIndex)); 183 m_branches.append(BranchCode(NotEqualToPush, medianIndex)); 184 m_branches.append(BranchCode(ExecuteCase, medianIndex)); 185 186 m_branches.append(BranchCode(Pop)); 187 build(medianIndex + 1, end); 188 189 m_branches.append(BranchCode(Pop)); 190 build(start, medianIndex); 191 break; 192 } } 188 // There are two different strategies we could consider here: 189 // 190 // Isolate median and split: pick a median and check if the comparison value is equal to it; 191 // if so, execute the median case. Otherwise check if the value is less than the median, and 192 // recurse left or right based on this. This has two subvariants: we could either first test 193 // equality for the median and then do the less-than, or we could first do the less-than and 194 // then check equality on the not-less-than path. 195 // 196 // Ignore median and split: do a less-than comparison on a value that splits the cases in two 197 // equal-sized halves. Recurse left or right based on the comparison. Do not test for equality 198 // against the median (or anything else); let the recursion handle those equality comparisons 199 // once we bottom out in a list that case 3 cases or less (see above). 200 // 201 // I'll refer to these strategies as Isolate and Ignore. I initially believed that Isolate 202 // would be faster since it leads to less branching for some lucky cases. It turns out that 203 // Isolate is almost a total fail in the average, assuming all cases are equally likely. How 204 // bad Isolate is depends on whether you believe that doing two consecutive branches based on 205 // the same comparison is cheaper than doing the compare/branches separately. This is 206 // difficult to evaluate. For small immediates that aren't blinded, we just care about 207 // avoiding a second compare instruction. For large immediates or when blinding is in play, we 208 // also care about the instructions used to materialize the immediate a second time. Isolate 209 // can help with both costs since it involves first doing a < compare+branch on some value, 210 // followed by a == compare+branch on the same exact value (or vice-versa). Ignore will do a < 211 // compare+branch on some value, and then the == compare+branch on that same value will happen 212 // much later. 213 // 214 // To evaluate these costs, I wrote the recurrence relation for Isolate and Ignore, assuming 215 // that ComparisonCost is the cost of a compare+branch and ChainedComparisonCost is the cost 216 // of a compare+branch on some value that you've just done another compare+branch for. These 217 // recurrence relations compute the total cost incurred if you executed the switch statement 218 // on each matching value. So the average cost of hitting some case can be computed as 219 // Isolate[n]/n or Ignore[n]/n, respectively for the two relations. 220 // 221 // Isolate[1] = ComparisonCost 222 // Isolate[2] = (2 + 1) * ComparisonCost 223 // Isolate[3] = (3 + 2 + 1) * ComparisonCost 224 // Isolate[n_] := With[ 225 // {medianIndex = Floor[n/2] + If[EvenQ[n], RandomInteger[], 1]}, 226 // ComparisonCost + ChainedComparisonCost + 227 // (ComparisonCost * (medianIndex - 1) + Isolate[medianIndex - 1]) + 228 // (2 * ComparisonCost * (n - medianIndex) + Isolate[n - medianIndex])] 229 // 230 // Ignore[1] = ComparisonCost 231 // Ignore[2] = (2 + 1) * ComparisonCost 232 // Ignore[3] = (3 + 2 + 1) * ComparisonCost 233 // Ignore[n_] := With[ 234 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]}, 235 // (medianIndex * ComparisonCost + Ignore[medianIndex]) + 236 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])] 237 // 238 // This does not account for the average cost of hitting the default case. See further below 239 // for a discussion of that. 240 // 241 // It turns out that for ComparisonCost = 1 and ChainedComparisonCost = 1, Ignore is always 242 // better than Isolate. If we assume that ChainedComparisonCost = 0, then Isolate wins for 243 // switch statements that have 20 cases or fewer, though the margin of victory is never large 244 // - it might sometimes save an average of 0.3 ComparisonCost. For larger switch statements, 245 // we see divergence between the two with Ignore winning. This is of course rather 246 // unrealistic since the chained comparison is never free. For ChainedComparisonCost = 0.5, we 247 // see Isolate winning for 10 cases or fewer, by maybe 0.2 ComparisonCost. Again we see 248 // divergence for large switches with Ignore winning, for example if a switch statement has 249 // 100 cases then Ignore saves one branch on average. 250 // 251 // Our current JIT backends don't provide for optimization for chained comparisons, except for 252 // reducing the code for materializing the immediate if the immediates are large or blinding 253 // comes into play. Probably our JIT backends live somewhere north of 254 // ChainedComparisonCost = 0.5. 255 // 256 // This implies that using the Ignore strategy is likely better. If we wanted to incorporate 257 // the Isolate strategy, we'd want to determine the switch size threshold at which the two 258 // cross over and then use Isolate for switches that are smaller than that size. 259 // 260 // The average cost of hitting the default case is similar, but involves a different cost for 261 // the base cases: you have to assume that you will always fail each branch. For the Ignore 262 // strategy we would get this recurrence relation; the same kind of thing happens to the 263 // Isolate strategy: 264 // 265 // Ignore[1] = ComparisonCost 266 // Ignore[2] = (2 + 2) * ComparisonCost 267 // Ignore[3] = (3 + 3 + 3) * ComparisonCost 268 // Ignore[n_] := With[ 269 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]}, 270 // (medianIndex * ComparisonCost + Ignore[medianIndex]) + 271 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])] 272 // 273 // This means that if we cared about the default case more, we would likely reduce 274 // leafThreshold. Reducing it to 2 would reduce the average cost of the default case by 1/3 275 // in the most extreme cases (num switch cases = 3, 6, 12, 24, ...). But it would also 276 // increase the average cost of taking one of the non-default cases by 1/3. Typically the 277 // difference is 1/6 in either direction. This makes it a very simple trade-off: if we believe 278 // that the default case is more important then we would want leafThreshold to be 2, and the 279 // default case would become 1/6 faster on average. But we believe that most switch statements 280 // are more likely to take one of the cases than the default, so we use leafThreshold = 3 281 // and get a 1/6 speed-up on average for taking an explicit case. 282 283 unsigned medianIndex = (start + end) / 2; 284 285 // We want medianIndex to point to the thing we will do a less-than compare against. We want 286 // this less-than compare to split the current sublist into equal-sized sublists, or 287 // nearly-equal-sized with some randomness if we're in the odd case. With the above 288 // calculation, in the odd case we will have medianIndex pointing at either the element we 289 // want or the element to the left of the one we want. Consider the case of five elements: 290 // 291 // 0 1 2 3 4 292 // 293 // start will be 0, end will be 5. The average is 2.5, which rounds down to 2. If we do 294 // value < 2, then we will split the list into 2 elements on the left and three on the right. 295 // That's pretty good, but in this odd case we'd like to at random choose 3 instead to ensure 296 // that we don't become unbalanced on the right. This does not improve throughput since one 297 // side will always get shafted, and that side might still be odd, in which case it will also 298 // have two sides and one of them will get shafted - and so on. We just want to avoid 299 // deterministic pathologies. 300 // 301 // In the even case, we will always end up pointing at the element we want: 302 // 303 // 0 1 2 3 304 // 305 // start will be 0, end will be 4. So, the average is 2, which is what we'd like. 306 if (size & 1) { 307 RELEASE_ASSERT(medianIndex - start + 1 == end - medianIndex); 308 medianIndex += m_weakRandom.getUint32() & 1; 309 } else 310 RELEASE_ASSERT(medianIndex - start == end - medianIndex); 311 312 RELEASE_ASSERT(medianIndex > start); 313 RELEASE_ASSERT(medianIndex + 1 < end); 314 315 m_branches.append(BranchCode(LessThanToPush, medianIndex)); 316 build(medianIndex, true, end); 317 m_branches.append(BranchCode(Pop)); 318 build(start, hardStart, medianIndex); 193 319 } 194 320
Note:
See TracChangeset
for help on using the changeset viewer.