source: webkit/trunk/Source/JavaScriptCore/dfg/DFGCSEPhase.cpp

Last change on this file was 265775, checked in by [email protected], 5 years ago

Have an OOB+SaneChain Array::Speculation
https://bugs.webkit.org/show_bug.cgi?id=215487

Reviewed by Yusuke Suzuki.

JSTests:

  • microbenchmarks/oob-sane-chain-contiguous.js: Added.
  • microbenchmarks/oob-sane-chain-double-read-undefined.js: Added.
  • microbenchmarks/oob-sane-chain-double.js: Added.
  • microbenchmarks/oob-sane-chain-int32.js: Added.
  • stress/oob-sane-chain-negative-index.js: Added.

Source/JavaScriptCore:

This patch adds a new ArrayMode speculation in the DFG/FTL called OutOfBoundsSaneChain.
It allows us to do fast things when we go OOB, like simply return undefined.
This is because we install watchpoints on the prototype chain to ensure they
have no indexed properties. This patch implements OutOfBoundsSaneChain on
GetByVal over Int32/Double/Contiguous original JS arrays. We can extend it in
the future to non original JS arrays if we prove their prototype is Array.prototype.
To implement this properly, we also need to ensure that the index isn't negative,
as Array.prototype/Object.prototype may have negative indexed accessors. We
do this via speculation, and if we ever recompile, and see an exit because of
this, we will stop speculating OutOfBoundsSaneChain.

This is about 20% faster on crypto-md5-SP. And ~3-4x faster on the
microbenchmarks I created.

  • dfg/DFGAbstractInterpreterInlines.h:

(JSC::DFG::AbstractInterpreter<AbstractStateType>::executeEffects):

  • dfg/DFGArrayMode.cpp:

(JSC::DFG::ArrayMode::refine const):
(JSC::DFG::arraySpeculationToString):

  • dfg/DFGArrayMode.h:

(JSC::DFG::ArrayMode::isInBoundsSaneChain const):
(JSC::DFG::ArrayMode::isOutOfBoundsSaneChain const):
(JSC::DFG::ArrayMode::isOutOfBounds const):
(JSC::DFG::ArrayMode::isEffectfulOutOfBounds const):
(JSC::DFG::ArrayMode::isInBounds const):
(JSC::DFG::ArrayMode::isSaneChain const): Deleted.

  • dfg/DFGCSEPhase.cpp:
  • dfg/DFGClobberize.h:

(JSC::DFG::clobberize):

  • dfg/DFGFixupPhase.cpp:

(JSC::DFG::FixupPhase::fixupNode):
(JSC::DFG::FixupPhase::checkArray):
(JSC::DFG::FixupPhase::setSaneChainIfPossible):
(JSC::DFG::FixupPhase::convertToHasIndexedProperty):

  • dfg/DFGSpeculativeJIT.cpp:

(JSC::DFG::SpeculativeJIT::compileGetByValOnString):
(JSC::DFG::SpeculativeJIT::compileHasIndexedProperty):

  • dfg/DFGSpeculativeJIT32_64.cpp:

(JSC::DFG::SpeculativeJIT::compile):

  • dfg/DFGSpeculativeJIT64.cpp:

(JSC::DFG::SpeculativeJIT::compile):

  • dfg/DFGValidate.cpp:
  • ftl/FTLLowerDFGToB3.cpp:

(JSC::FTL::DFG::LowerDFGToB3::compileGetByVal):
(JSC::FTL::DFG::LowerDFGToB3::compileStringCharAt):
(JSC::FTL::DFG::LowerDFGToB3::compileHasIndexedProperty):

File size: 36.7 KB
Line 
1/*
2 * Copyright (C) 2011-2019 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "DFGCSEPhase.h"
28
29#if ENABLE(DFG_JIT)
30
31#include "ButterflyInlines.h"
32#include "DFGAbstractHeap.h"
33#include "DFGBlockMapInlines.h"
34#include "DFGClobberSet.h"
35#include "DFGClobberize.h"
36#include "DFGDominators.h"
37#include "DFGGraph.h"
38#include "DFGPhase.h"
39
40namespace JSC { namespace DFG {
41
42// This file contains two CSE implementations: local and global. LocalCSE typically runs when we're
43// in DFG mode, i.e. we want to compile quickly. LocalCSE contains a lot of optimizations for
44// compile time. GlobalCSE, on the other hand, is fairly straight-forward. It will find more
45// optimization opportunities by virtue of being global.
46
47namespace {
48
49namespace DFGCSEPhaseInternal {
50static constexpr bool verbose = false;
51}
52
53class ImpureDataSlot {
54 WTF_MAKE_NONCOPYABLE(ImpureDataSlot);
55 WTF_MAKE_FAST_ALLOCATED;
56public:
57 ImpureDataSlot(HeapLocation key, LazyNode value, unsigned hash)
58 : key(key), value(value), hash(hash)
59 { }
60
61 HeapLocation key;
62 LazyNode value;
63 unsigned hash;
64};
65
66struct ImpureDataSlotHash : public DefaultHash<std::unique_ptr<ImpureDataSlot>> {
67 static unsigned hash(const std::unique_ptr<ImpureDataSlot>& key)
68 {
69 return key->hash;
70 }
71
72 static bool equal(const std::unique_ptr<ImpureDataSlot>& a, const std::unique_ptr<ImpureDataSlot>& b)
73 {
74 // The ImpureDataSlot are unique per table per HeapLocation. This lets us compare the key
75 // by just comparing the pointers of the unique ImpureDataSlots.
76 ASSERT(a != b || a->key == b->key);
77 return a == b;
78 }
79};
80
81struct ImpureDataTranslator {
82 static unsigned hash(const HeapLocation& key)
83 {
84 return key.hash();
85 }
86
87 static bool equal(const std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key)
88 {
89 if (!slot)
90 return false;
91 if (HashTraits<std::unique_ptr<ImpureDataSlot>>::isDeletedValue(slot))
92 return false;
93 return slot->key == key;
94 }
95
96 static void translate(std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key, unsigned hashCode)
97 {
98 new (NotNull, std::addressof(slot)) std::unique_ptr<ImpureDataSlot>(new ImpureDataSlot {key, LazyNode(), hashCode});
99 }
100};
101
102class ImpureMap {
103 WTF_MAKE_FAST_ALLOCATED;
104 WTF_MAKE_NONCOPYABLE(ImpureMap);
105public:
106 ImpureMap() = default;
107
108 ImpureMap(ImpureMap&& other)
109 {
110 m_abstractHeapStackMap.swap(other.m_abstractHeapStackMap);
111 m_fallbackStackMap.swap(other.m_fallbackStackMap);
112 m_heapMap.swap(other.m_heapMap);
113#if !defined(NDEBUG)
114 m_debugImpureData.swap(other.m_debugImpureData);
115#endif
116 }
117
118 const ImpureDataSlot* add(const HeapLocation& location, const LazyNode& node)
119 {
120 const ImpureDataSlot* result = addImpl(location, node);
121
122#if !defined(NDEBUG)
123 auto addResult = m_debugImpureData.add(location, node);
124 ASSERT(!!result == !addResult.isNewEntry);
125#endif
126 return result;
127 }
128
129 LazyNode get(const HeapLocation& location) const
130 {
131 LazyNode result = getImpl(location);
132#if !defined(NDEBUG)
133 ASSERT(result == m_debugImpureData.get(location));
134#endif
135 return result;
136 }
137
138 void clobber(AbstractHeap heap, bool clobberConservatively)
139 {
140 switch (heap.kind()) {
141 case World: {
142 clear();
143 break;
144 }
145 case SideState:
146 break;
147 case Stack: {
148 ASSERT(!heap.payload().isTop());
149 m_abstractHeapStackMap.remove(heap.payload().value());
150 if (clobberConservatively)
151 m_fallbackStackMap.clear();
152 else
153 clobber(m_fallbackStackMap, heap);
154 break;
155 }
156 default:
157 if (clobberConservatively)
158 m_heapMap.clear();
159 else
160 clobber(m_heapMap, heap);
161 break;
162 }
163#if !defined(NDEBUG)
164 m_debugImpureData.removeIf([heap, clobberConservatively, this](const HashMap<HeapLocation, LazyNode>::KeyValuePairType& pair) -> bool {
165 switch (heap.kind()) {
166 case World:
167 case SideState:
168 break;
169 case Stack: {
170 if (!clobberConservatively)
171 break;
172 if (pair.key.heap().kind() == Stack) {
173 auto iterator = m_abstractHeapStackMap.find(pair.key.heap().payload().value());
174 if (iterator != m_abstractHeapStackMap.end() && iterator->value->key == pair.key)
175 return false;
176 return true;
177 }
178 break;
179 }
180 default: {
181 if (!clobberConservatively)
182 break;
183 AbstractHeapKind kind = pair.key.heap().kind();
184 if (kind != World && kind != SideState && kind != Stack)
185 return true;
186 break;
187 }
188 }
189 return heap.overlaps(pair.key.heap());
190 });
191 ASSERT(m_debugImpureData.size()
192 == (m_heapMap.size()
193 + m_abstractHeapStackMap.size()
194 + m_fallbackStackMap.size()));
195
196 const bool verifyClobber = false;
197 if (verifyClobber) {
198 for (auto& pair : m_debugImpureData)
199 ASSERT(!!get(pair.key));
200 }
201#endif
202 }
203
204 void clear()
205 {
206 m_abstractHeapStackMap.clear();
207 m_fallbackStackMap.clear();
208 m_heapMap.clear();
209#if !defined(NDEBUG)
210 m_debugImpureData.clear();
211#endif
212 }
213
214private:
215 typedef HashSet<std::unique_ptr<ImpureDataSlot>, ImpureDataSlotHash> Map;
216
217 const ImpureDataSlot* addImpl(const HeapLocation& location, const LazyNode& node)
218 {
219 switch (location.heap().kind()) {
220 case World:
221 case SideState:
222 RELEASE_ASSERT_NOT_REACHED();
223 case Stack: {
224 AbstractHeap abstractHeap = location.heap();
225 if (abstractHeap.payload().isTop())
226 return add(m_fallbackStackMap, location, node);
227 auto addResult = m_abstractHeapStackMap.add(abstractHeap.payload().value(), nullptr);
228 if (addResult.isNewEntry) {
229 addResult.iterator->value.reset(new ImpureDataSlot {location, node, 0});
230 return nullptr;
231 }
232 if (addResult.iterator->value->key == location)
233 return addResult.iterator->value.get();
234 return add(m_fallbackStackMap, location, node);
235 }
236 default:
237 return add(m_heapMap, location, node);
238 }
239 return nullptr;
240 }
241
242 LazyNode getImpl(const HeapLocation& location) const
243 {
244 switch (location.heap().kind()) {
245 case World:
246 case SideState:
247 RELEASE_ASSERT_NOT_REACHED();
248 case Stack: {
249 auto iterator = m_abstractHeapStackMap.find(location.heap().payload().value());
250 if (iterator != m_abstractHeapStackMap.end()
251 && iterator->value->key == location)
252 return iterator->value->value;
253 return get(m_fallbackStackMap, location);
254 }
255 default:
256 return get(m_heapMap, location);
257 }
258 return LazyNode();
259 }
260
261 static const ImpureDataSlot* add(Map& map, const HeapLocation& location, const LazyNode& node)
262 {
263 auto result = map.add<ImpureDataTranslator>(location);
264 if (result.isNewEntry) {
265 (*result.iterator)->value = node;
266 return nullptr;
267 }
268 return result.iterator->get();
269 }
270
271 static LazyNode get(const Map& map, const HeapLocation& location)
272 {
273 auto iterator = map.find<ImpureDataTranslator>(location);
274 if (iterator != map.end())
275 return (*iterator)->value;
276 return LazyNode();
277 }
278
279 static void clobber(Map& map, AbstractHeap heap)
280 {
281 map.removeIf([heap](const std::unique_ptr<ImpureDataSlot>& slot) -> bool {
282 return heap.overlaps(slot->key.heap());
283 });
284 }
285
286 // The majority of Impure Stack Slots are unique per value.
287 // This is very useful for fast clobber(), we can just remove the slot addressed by AbstractHeap
288 // in O(1).
289 //
290 // When there are conflict, any additional HeapLocation is added in the fallback map.
291 // This works well because fallbackStackMap remains tiny.
292 //
293 // One cannot assume a unique ImpureData is in m_abstractHeapStackMap. It may have been
294 // a duplicate in the past and now only live in m_fallbackStackMap.
295 //
296 // Obviously, TOP always goes into m_fallbackStackMap since it does not have a unique value.
297 HashMap<int64_t, std::unique_ptr<ImpureDataSlot>, DefaultHash<int64_t>, WTF::SignedWithZeroKeyHashTraits<int64_t>> m_abstractHeapStackMap;
298 Map m_fallbackStackMap;
299
300 Map m_heapMap;
301
302#if !defined(NDEBUG)
303 HashMap<HeapLocation, LazyNode> m_debugImpureData;
304#endif
305};
306
307class LocalCSEPhase : public Phase {
308public:
309 LocalCSEPhase(Graph& graph)
310 : Phase(graph, "local common subexpression elimination")
311 , m_smallBlock(graph)
312 , m_largeBlock(graph)
313 , m_hugeBlock(graph)
314 {
315 }
316
317 bool run()
318 {
319 ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
320 ASSERT(m_graph.m_form == ThreadedCPS || m_graph.m_form == LoadStore);
321
322 bool changed = false;
323
324 m_graph.clearReplacements();
325
326 for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
327 BasicBlock* block = m_graph.block(blockIndex);
328 if (!block)
329 continue;
330
331 if (block->size() <= SmallMaps::capacity)
332 changed |= m_smallBlock.run(block);
333 else if (block->size() <= Options::maxDFGNodesInBasicBlockForPreciseAnalysis())
334 changed |= m_largeBlock.run(block);
335 else
336 changed |= m_hugeBlock.run(block);
337 }
338
339 return changed;
340 }
341
342private:
343 class SmallMaps {
344 public:
345 // This permits SmallMaps to be used for blocks that have up to 100 nodes. In practice,
346 // fewer than half of the nodes in a block have pure defs, and even fewer have impure defs.
347 // Thus, a capacity limit of 100 probably means that somewhere around ~40 things may end up
348 // in one of these "small" list-based maps. That number still seems largeish, except that
349 // the overhead of HashMaps can be quite high currently: clearing them, or even removing
350 // enough things from them, deletes (or resizes) their backing store eagerly. Hence
351 // HashMaps induce a lot of malloc traffic.
352 static constexpr unsigned capacity = 100;
353
354 SmallMaps()
355 : m_pureLength(0)
356 , m_impureLength(0)
357 {
358 }
359
360 void clear()
361 {
362 m_pureLength = 0;
363 m_impureLength = 0;
364 }
365
366 void write(AbstractHeap heap)
367 {
368 if (heap.kind() == SideState)
369 return;
370
371 for (unsigned i = 0; i < m_impureLength; ++i) {
372 if (heap.overlaps(m_impureMap[i].key.heap()))
373 m_impureMap[i--] = m_impureMap[--m_impureLength];
374 }
375 }
376
377 Node* addPure(PureValue value, Node* node)
378 {
379 for (unsigned i = m_pureLength; i--;) {
380 if (m_pureMap[i].key == value)
381 return m_pureMap[i].value;
382 }
383
384 RELEASE_ASSERT(m_pureLength < capacity);
385 m_pureMap[m_pureLength++] = WTF::KeyValuePair<PureValue, Node*>(value, node);
386 return nullptr;
387 }
388
389 LazyNode findReplacement(HeapLocation location)
390 {
391 for (unsigned i = m_impureLength; i--;) {
392 if (m_impureMap[i].key == location)
393 return m_impureMap[i].value;
394 }
395 return nullptr;
396 }
397
398 LazyNode addImpure(HeapLocation location, LazyNode node)
399 {
400 // FIXME: If we are using small maps, we must not def() derived values.
401 // For now the only derived values we def() are constant-based.
402 if (location.index() && !location.index().isNode())
403 return nullptr;
404 if (LazyNode result = findReplacement(location))
405 return result;
406 RELEASE_ASSERT(m_impureLength < capacity);
407 m_impureMap[m_impureLength++] = WTF::KeyValuePair<HeapLocation, LazyNode>(location, node);
408 return nullptr;
409 }
410
411 private:
412 WTF::KeyValuePair<PureValue, Node*> m_pureMap[capacity];
413 WTF::KeyValuePair<HeapLocation, LazyNode> m_impureMap[capacity];
414 unsigned m_pureLength;
415 unsigned m_impureLength;
416 };
417
418 class LargeMaps {
419 public:
420 LargeMaps()
421 {
422 }
423
424 void clear()
425 {
426 m_pureMap.clear();
427 m_impureMap.clear();
428 }
429
430 void write(AbstractHeap heap)
431 {
432 bool clobberConservatively = false;
433 m_impureMap.clobber(heap, clobberConservatively);
434 }
435
436 Node* addPure(PureValue value, Node* node)
437 {
438 auto result = m_pureMap.add(value, node);
439 if (result.isNewEntry)
440 return nullptr;
441 return result.iterator->value;
442 }
443
444 LazyNode findReplacement(HeapLocation location)
445 {
446 return m_impureMap.get(location);
447 }
448
449 LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
450 {
451 if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
452 return slot->value;
453 return LazyNode();
454 }
455
456 private:
457 HashMap<PureValue, Node*> m_pureMap;
458 ImpureMap m_impureMap;
459 };
460
461 // This is used only for huge basic blocks. Our usual CSE is quadratic complexity for # of DFG nodes in a basic block.
462 // HugeMaps model results conservatively to avoid an O(N^2) algorithm. In particular, we clear all the slots of the specified heap kind
463 // in ImpureMap instead of iterating slots and removing a matched slot. This change makes the complexity O(N).
464 // FIXME: We can make LargeMap O(N) without introducing conservative behavior if we track clobbering by hierarchical epochs.
465 // https://bugs.webkit.org/show_bug.cgi?id=200014
466 class HugeMaps {
467 public:
468 HugeMaps() = default;
469
470 void clear()
471 {
472 m_pureMap.clear();
473 m_impureMap.clear();
474 }
475
476 void write(AbstractHeap heap)
477 {
478 bool clobberConservatively = true;
479 m_impureMap.clobber(heap, clobberConservatively);
480 }
481
482 Node* addPure(PureValue value, Node* node)
483 {
484 auto result = m_pureMap.add(value, node);
485 if (result.isNewEntry)
486 return nullptr;
487 return result.iterator->value;
488 }
489
490 LazyNode findReplacement(HeapLocation location)
491 {
492 return m_impureMap.get(location);
493 }
494
495 LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
496 {
497 if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
498 return slot->value;
499 return LazyNode();
500 }
501
502 private:
503 HashMap<PureValue, Node*> m_pureMap;
504 ImpureMap m_impureMap;
505 };
506
507 template<typename Maps>
508 class BlockCSE {
509 public:
510 BlockCSE(Graph& graph)
511 : m_graph(graph)
512 , m_insertionSet(graph)
513 {
514 }
515
516 bool run(BasicBlock* block)
517 {
518 m_maps.clear();
519 m_changed = false;
520 m_block = block;
521
522 for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) {
523 m_node = block->at(nodeIndex);
524 m_graph.performSubstitution(m_node);
525
526 if (m_node->op() == Identity || m_node->op() == IdentityWithProfile) {
527 m_node->replaceWith(m_graph, m_node->child1().node());
528 m_changed = true;
529 } else {
530 // This rule only makes sense for local CSE, since in SSA form we have already
531 // factored the bounds check out of the PutByVal. It's kind of gross, but we
532 // still have reason to believe that PutByValAlias is a good optimization and
533 // that it's better to do it with a single node rather than separating out the
534 // CheckInBounds.
535 if (m_node->op() == PutByVal || m_node->op() == PutByValDirect) {
536 HeapLocation heap;
537
538 Node* base = m_graph.varArgChild(m_node, 0).node();
539 Node* index = m_graph.varArgChild(m_node, 1).node();
540 LocationKind indexedPropertyLoc = indexedPropertyLocForResultType(m_node->result());
541
542 ArrayMode mode = m_node->arrayMode();
543 switch (mode.type()) {
544 case Array::Int32:
545 if (!mode.isInBounds())
546 break;
547 heap = HeapLocation(indexedPropertyLoc, IndexedInt32Properties, base, index);
548 break;
549
550 case Array::Double: {
551 if (!mode.isInBounds())
552 break;
553 LocationKind kind = mode.isInBoundsSaneChain() ? IndexedPropertyDoubleSaneChainLoc : IndexedPropertyDoubleLoc;
554 heap = HeapLocation(kind, IndexedDoubleProperties, base, index);
555 break;
556 }
557
558 case Array::Contiguous:
559 if (!mode.isInBounds())
560 break;
561 heap = HeapLocation(indexedPropertyLoc, IndexedContiguousProperties, base, index);
562 break;
563
564 case Array::Int8Array:
565 case Array::Int16Array:
566 case Array::Int32Array:
567 case Array::Uint8Array:
568 case Array::Uint8ClampedArray:
569 case Array::Uint16Array:
570 case Array::Uint32Array:
571 case Array::Float32Array:
572 case Array::Float64Array:
573 if (!mode.isInBounds())
574 break;
575 heap = HeapLocation(
576 indexedPropertyLoc, TypedArrayProperties, base, index);
577 break;
578
579 default:
580 break;
581 }
582
583 if (!!heap && m_maps.findReplacement(heap))
584 m_node->setOp(PutByValAlias);
585 }
586
587 clobberize(m_graph, m_node, *this);
588 }
589 }
590
591 m_insertionSet.execute(block);
592
593 return m_changed;
594 }
595
596 void read(AbstractHeap) { }
597
598 void write(AbstractHeap heap)
599 {
600 m_maps.write(heap);
601 }
602
603 void def(PureValue value)
604 {
605 Node* match = m_maps.addPure(value, m_node);
606 if (!match)
607 return;
608
609 m_node->replaceWith(m_graph, match);
610 m_changed = true;
611 }
612
613 void def(const HeapLocation& location, const LazyNode& value)
614 {
615 LazyNode match = m_maps.addImpure(location, value);
616 if (!match)
617 return;
618
619 if (m_node->op() == GetLocal) {
620 // Usually the CPS rethreading phase does this. But it's OK for us to mess with
621 // locals so long as:
622 //
623 // - We dethread the graph. Any changes we make may invalidate the assumptions of
624 // our CPS form, particularly if this GetLocal is linked to the variablesAtTail.
625 //
626 // - We don't introduce a Phantom for the child of the GetLocal. This wouldn't be
627 // totally wrong but it would pessimize the code. Just because there is a
628 // GetLocal doesn't mean that the child was live. Simply rerouting the all uses
629 // of this GetLocal will preserve the live-at-exit information just fine.
630 //
631 // We accomplish the latter by just clearing the child; then the Phantom that we
632 // introduce won't have children and so it will eventually just be deleted.
633
634 m_node->child1() = Edge();
635 m_graph.dethread();
636 }
637
638 if (value.isNode() && value.asNode() == m_node) {
639 match.ensureIsNode(m_insertionSet, m_block, 0)->owner = m_block;
640 ASSERT(match.isNode());
641 m_node->replaceWith(m_graph, match.asNode());
642 m_changed = true;
643 }
644 }
645
646 private:
647 Graph& m_graph;
648
649 bool m_changed;
650 Node* m_node;
651 BasicBlock* m_block;
652
653 Maps m_maps;
654
655 InsertionSet m_insertionSet;
656 };
657
658 BlockCSE<SmallMaps> m_smallBlock;
659 BlockCSE<LargeMaps> m_largeBlock;
660 BlockCSE<HugeMaps> m_hugeBlock;
661};
662
663class GlobalCSEPhase : public Phase {
664public:
665 GlobalCSEPhase(Graph& graph)
666 : Phase(graph, "global common subexpression elimination")
667 , m_impureDataMap(graph)
668 , m_insertionSet(graph)
669 {
670 }
671
672 bool run()
673 {
674 ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
675 ASSERT(m_graph.m_form == SSA);
676
677 m_graph.initializeNodeOwners();
678 m_graph.ensureSSADominators();
679
680 m_preOrder = m_graph.blocksInPreOrder();
681
682 // First figure out what gets clobbered by blocks. Node that this uses the preOrder list
683 // for convenience only.
684 for (unsigned i = m_preOrder.size(); i--;) {
685 m_block = m_preOrder[i];
686 m_impureData = &m_impureDataMap[m_block];
687 for (unsigned nodeIndex = m_block->size(); nodeIndex--;)
688 addWrites(m_graph, m_block->at(nodeIndex), m_impureData->writes);
689 }
690
691 // Based on my experience doing this before, what follows might have to be made iterative.
692 // Right now it doesn't have to be iterative because everything is dominator-bsed. But when
693 // validation is enabled, we check if iterating would find new CSE opportunities.
694
695 bool changed = iterate();
696
697 // FIXME: It should be possible to assert that CSE will not find any new opportunities if you
698 // run it a second time. Unfortunately, we cannot assert this right now. Note that if we did
699 // this, we'd have to first reset all of our state.
700 // https://bugs.webkit.org/show_bug.cgi?id=145853
701
702 return changed;
703 }
704
705 bool iterate()
706 {
707 if (DFGCSEPhaseInternal::verbose)
708 dataLog("Performing iteration.\n");
709
710 m_changed = false;
711 m_graph.clearReplacements();
712
713 for (unsigned i = 0; i < m_preOrder.size(); ++i) {
714 m_block = m_preOrder[i];
715 m_impureData = &m_impureDataMap[m_block];
716 m_writesSoFar.clear();
717
718 if (DFGCSEPhaseInternal::verbose)
719 dataLog("Processing block ", *m_block, ":\n");
720
721 for (unsigned nodeIndex = 0; nodeIndex < m_block->size(); ++nodeIndex) {
722 m_nodeIndex = nodeIndex;
723 m_node = m_block->at(nodeIndex);
724 if (DFGCSEPhaseInternal::verbose)
725 dataLog(" Looking at node ", m_node, ":\n");
726
727 m_graph.performSubstitution(m_node);
728
729 if (m_node->op() == Identity || m_node->op() == IdentityWithProfile) {
730 m_node->replaceWith(m_graph, m_node->child1().node());
731 m_changed = true;
732 } else
733 clobberize(m_graph, m_node, *this);
734 }
735
736 m_insertionSet.execute(m_block);
737
738 m_impureData->didVisit = true;
739 }
740
741 return m_changed;
742 }
743
744 void read(AbstractHeap) { }
745
746 void write(AbstractHeap heap)
747 {
748 bool clobberConservatively = false;
749 m_impureData->availableAtTail.clobber(heap, clobberConservatively);
750 m_writesSoFar.add(heap);
751 }
752
753 void def(PureValue value)
754 {
755 // With pure values we do not have to worry about the possibility of some control flow path
756 // clobbering the value. So, we just search for all of the like values that have been
757 // computed. We pick one that is in a block that dominates ours. Note that this means that
758 // a PureValue will map to a list of nodes, since there may be many places in the control
759 // flow graph that compute a value but only one of them that dominates us. We may build up
760 // a large list of nodes that compute some value in the case of gnarly control flow. This
761 // is probably OK.
762
763 auto result = m_pureValues.add(value, Vector<Node*>());
764 if (result.isNewEntry) {
765 result.iterator->value.append(m_node);
766 return;
767 }
768
769 for (unsigned i = result.iterator->value.size(); i--;) {
770 Node* candidate = result.iterator->value[i];
771 if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
772 m_node->replaceWith(m_graph, candidate);
773 m_changed = true;
774 return;
775 }
776 }
777
778 result.iterator->value.append(m_node);
779 }
780
781 LazyNode findReplacement(HeapLocation location)
782 {
783 // At this instant, our "availableAtTail" reflects the set of things that are available in
784 // this block so far. We check this map to find block-local CSE opportunities before doing
785 // a global search.
786 LazyNode match = m_impureData->availableAtTail.get(location);
787 if (!!match) {
788 if (DFGCSEPhaseInternal::verbose)
789 dataLog(" Found local match: ", match, "\n");
790 return match;
791 }
792
793 // If it's not available at this point in the block, and at some prior point in the block
794 // we have clobbered this heap location, then there is no point in doing a global search.
795 if (m_writesSoFar.overlaps(location.heap())) {
796 if (DFGCSEPhaseInternal::verbose)
797 dataLog(" Not looking globally because of local clobber: ", m_writesSoFar, "\n");
798 return nullptr;
799 }
800
801 // This perfoms a backward search over the control flow graph to find some possible
802 // non-local def() that matches our heap location. Such a match is only valid if there does
803 // not exist any path from that def() to our block that contains a write() that overlaps
804 // our heap. This algorithm looks for both of these things (the matching def and the
805 // overlapping writes) in one backwards DFS pass.
806 //
807 // This starts by looking at the starting block's predecessors, and then it continues along
808 // their predecessors. As soon as this finds a possible def() - one that defines the heap
809 // location we want while dominating our starting block - it assumes that this one must be
810 // the match. It then lets the DFS over predecessors complete, but it doesn't add the
811 // def()'s predecessors; this ensures that any blocks we visit thereafter are on some path
812 // from the def() to us. As soon as the DFG finds a write() that overlaps the location's
813 // heap, it stops, assuming that there is no possible match. Note that the write() case may
814 // trigger before we find a def(), or after. Either way, the write() case causes this
815 // function to immediately return nullptr.
816 //
817 // If the write() is found before we find the def(), then we know that any def() we would
818 // find would have a path to us that trips over the write() and hence becomes invalid. This
819 // is just a direct outcome of us looking for a def() that dominates us. Given a block A
820 // that dominates block B - so that A is the one that would have the def() and B is our
821 // starting block - we know that any other block must either be on the path from A to B, or
822 // it must be on a path from the root to A, but not both. So, if we haven't found A yet but
823 // we already have found a block C that has a write(), then C must be on some path from A
824 // to B, which means that A's def() is invalid for our purposes. Hence, before we find the
825 // def(), stopping on write() is the right thing to do.
826 //
827 // Stopping on write() is also the right thing to do after we find the def(). After we find
828 // the def(), we don't add that block's predecessors to the search worklist. That means
829 // that henceforth the only blocks we will see in the search are blocks on the path from
830 // the def() to us. If any such block has a write() that clobbers our heap then we should
831 // give up.
832 //
833 // Hence this graph search algorithm ends up being deceptively simple: any overlapping
834 // write() causes us to immediately return nullptr, and a matching def() means that we just
835 // record it and neglect to visit its precessors.
836
837 Vector<BasicBlock*, 8> worklist;
838 Vector<BasicBlock*, 8> seenList;
839 BitVector seen;
840
841 for (unsigned i = m_block->predecessors.size(); i--;) {
842 BasicBlock* predecessor = m_block->predecessors[i];
843 if (!seen.get(predecessor->index)) {
844 worklist.append(predecessor);
845 seen.set(predecessor->index);
846 }
847 }
848
849 while (!worklist.isEmpty()) {
850 BasicBlock* block = worklist.takeLast();
851 seenList.append(block);
852
853 if (DFGCSEPhaseInternal::verbose)
854 dataLog(" Searching in block ", *block, "\n");
855 ImpureBlockData& data = m_impureDataMap[block];
856
857 // We require strict domination because this would only see things in our own block if
858 // they came *after* our position in the block. Clearly, while our block dominates
859 // itself, the things in the block after us don't dominate us.
860 if (m_graph.m_ssaDominators->strictlyDominates(block, m_block)) {
861 if (DFGCSEPhaseInternal::verbose)
862 dataLog(" It strictly dominates.\n");
863 DFG_ASSERT(m_graph, m_node, data.didVisit);
864 DFG_ASSERT(m_graph, m_node, !match);
865 match = data.availableAtTail.get(location);
866 if (DFGCSEPhaseInternal::verbose)
867 dataLog(" Availability: ", match, "\n");
868 if (!!match) {
869 // Don't examine the predecessors of a match. At this point we just want to
870 // establish that other blocks on the path from here to there don't clobber
871 // the location we're interested in.
872 continue;
873 }
874 }
875
876 if (DFGCSEPhaseInternal::verbose)
877 dataLog(" Dealing with write set ", data.writes, "\n");
878 if (data.writes.overlaps(location.heap())) {
879 if (DFGCSEPhaseInternal::verbose)
880 dataLog(" Clobbered.\n");
881 return nullptr;
882 }
883
884 for (unsigned i = block->predecessors.size(); i--;) {
885 BasicBlock* predecessor = block->predecessors[i];
886 if (!seen.get(predecessor->index)) {
887 worklist.append(predecessor);
888 seen.set(predecessor->index);
889 }
890 }
891 }
892
893 if (!match)
894 return nullptr;
895
896 // Cache the results for next time. We cache them both for this block and for all of our
897 // predecessors, since even though we've already visited our predecessors, our predecessors
898 // probably have successors other than us.
899 // FIXME: Consider caching failed searches as well, when match is null. It's not clear that
900 // the reduction in compile time would warrant the increase in complexity, though.
901 // https://bugs.webkit.org/show_bug.cgi?id=134876
902 for (BasicBlock* block : seenList)
903 m_impureDataMap[block].availableAtTail.add(location, match);
904 m_impureData->availableAtTail.add(location, match);
905
906 return match;
907 }
908
909 void def(HeapLocation location, LazyNode value)
910 {
911 if (DFGCSEPhaseInternal::verbose)
912 dataLog(" Got heap location def: ", location, " -> ", value, "\n");
913
914 LazyNode match = findReplacement(location);
915
916 if (DFGCSEPhaseInternal::verbose)
917 dataLog(" Got match: ", match, "\n");
918
919 if (!match) {
920 if (DFGCSEPhaseInternal::verbose)
921 dataLog(" Adding at-tail mapping: ", location, " -> ", value, "\n");
922 auto result = m_impureData->availableAtTail.add(location, value);
923 ASSERT_UNUSED(result, !result);
924 return;
925 }
926
927 if (value.isNode() && value.asNode() == m_node) {
928 if (!match.isNode()) {
929 // We need to properly record the constant in order to use an existing one if applicable.
930 // This ensures that re-running GCSE will not find new optimizations.
931 match.ensureIsNode(m_insertionSet, m_block, m_nodeIndex)->owner = m_block;
932 auto result = m_pureValues.add(PureValue(match.asNode(), match->constant()), Vector<Node*>());
933 bool replaced = false;
934 if (!result.isNewEntry) {
935 for (unsigned i = result.iterator->value.size(); i--;) {
936 Node* candidate = result.iterator->value[i];
937 if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
938 ASSERT(candidate);
939 match->replaceWith(m_graph, candidate);
940 match.setNode(candidate);
941 replaced = true;
942 break;
943 }
944 }
945 }
946 if (!replaced)
947 result.iterator->value.append(match.asNode());
948 }
949 ASSERT(match.asNode());
950 m_node->replaceWith(m_graph, match.asNode());
951 m_changed = true;
952 }
953 }
954
955 struct ImpureBlockData {
956 ImpureBlockData()
957 : didVisit(false)
958 {
959 }
960
961 ClobberSet writes;
962 ImpureMap availableAtTail;
963 bool didVisit;
964 };
965
966 Vector<BasicBlock*> m_preOrder;
967
968 PureMultiMap m_pureValues;
969 BlockMap<ImpureBlockData> m_impureDataMap;
970
971 BasicBlock* m_block;
972 Node* m_node;
973 unsigned m_nodeIndex;
974 ImpureBlockData* m_impureData;
975 ClobberSet m_writesSoFar;
976 InsertionSet m_insertionSet;
977
978 bool m_changed;
979};
980
981} // anonymous namespace
982
983bool performLocalCSE(Graph& graph)
984{
985 return runPhase<LocalCSEPhase>(graph);
986}
987
988bool performGlobalCSE(Graph& graph)
989{
990 return runPhase<GlobalCSEPhase>(graph);
991}
992
993} } // namespace JSC::DFG
994
995#endif // ENABLE(DFG_JIT)
Note: See TracBrowser for help on using the repository browser.