Context Navigation

DFGCSEPhase.cpp

Visit:

Last change on this file was 265775, checked in by [email protected], 5 years ago

Have an OOB+SaneChain Array::Speculation
https://bugs.webkit.org/show_bug.cgi?id=215487

Reviewed by Yusuke Suzuki.

JSTests:

microbenchmarks/oob-sane-chain-contiguous.js: Added.
microbenchmarks/oob-sane-chain-double-read-undefined.js: Added.
microbenchmarks/oob-sane-chain-double.js: Added.
microbenchmarks/oob-sane-chain-int32.js: Added.
stress/oob-sane-chain-negative-index.js: Added.

Source/JavaScriptCore:

This patch adds a new ArrayMode speculation in the DFG/FTL called OutOfBoundsSaneChain.
It allows us to do fast things when we go OOB, like simply return undefined.
This is because we install watchpoints on the prototype chain to ensure they
have no indexed properties. This patch implements OutOfBoundsSaneChain on
GetByVal over Int32/Double/Contiguous original JS arrays. We can extend it in
the future to non original JS arrays if we prove their prototype is Array.prototype.
To implement this properly, we also need to ensure that the index isn't negative,
as Array.prototype/Object.prototype may have negative indexed accessors. We
do this via speculation, and if we ever recompile, and see an exit because of
this, we will stop speculating OutOfBoundsSaneChain.

This is about 20% faster on crypto-md5-SP. And ~3-4x faster on the
microbenchmarks I created.

dfg/DFGAbstractInterpreterInlines.h:

(JSC::DFG::AbstractInterpreter<AbstractStateType>::executeEffects):

dfg/DFGArrayMode.cpp:

(JSC::DFG::ArrayMode::refine const):
(JSC::DFG::arraySpeculationToString):

dfg/DFGArrayMode.h:

(JSC::DFG::ArrayMode::isInBoundsSaneChain const):
(JSC::DFG::ArrayMode::isOutOfBoundsSaneChain const):
(JSC::DFG::ArrayMode::isOutOfBounds const):
(JSC::DFG::ArrayMode::isEffectfulOutOfBounds const):
(JSC::DFG::ArrayMode::isInBounds const):
(JSC::DFG::ArrayMode::isSaneChain const): Deleted.

dfg/DFGCSEPhase.cpp:
dfg/DFGClobberize.h:

(JSC::DFG::clobberize):

dfg/DFGFixupPhase.cpp:

(JSC::DFG::FixupPhase::fixupNode):
(JSC::DFG::FixupPhase::checkArray):
(JSC::DFG::FixupPhase::setSaneChainIfPossible):
(JSC::DFG::FixupPhase::convertToHasIndexedProperty):

dfg/DFGSpeculativeJIT.cpp:

(JSC::DFG::SpeculativeJIT::compileGetByValOnString):
(JSC::DFG::SpeculativeJIT::compileHasIndexedProperty):

dfg/DFGSpeculativeJIT32_64.cpp:

(JSC::DFG::SpeculativeJIT::compile):

dfg/DFGSpeculativeJIT64.cpp:

(JSC::DFG::SpeculativeJIT::compile):

dfg/DFGValidate.cpp:
ftl/FTLLowerDFGToB3.cpp:

(JSC::FTL::DFG::LowerDFGToB3::compileGetByVal):
(JSC::FTL::DFG::LowerDFGToB3::compileStringCharAt):
(JSC::FTL::DFG::LowerDFGToB3::compileHasIndexedProperty):

File size: 36.7 KB

Line
1	/*
2	* Copyright (C) 2011-2019 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "DFGCSEPhase.h"
28
29	#if ENABLE(DFG_JIT)
30
31	#include "ButterflyInlines.h"
32	#include "DFGAbstractHeap.h"
33	#include "DFGBlockMapInlines.h"
34	#include "DFGClobberSet.h"
35	#include "DFGClobberize.h"
36	#include "DFGDominators.h"
37	#include "DFGGraph.h"
38	#include "DFGPhase.h"
39
40	namespace JSC { namespace DFG {
41
42	// This file contains two CSE implementations: local and global. LocalCSE typically runs when we're
43	// in DFG mode, i.e. we want to compile quickly. LocalCSE contains a lot of optimizations for
44	// compile time. GlobalCSE, on the other hand, is fairly straight-forward. It will find more
45	// optimization opportunities by virtue of being global.
46
47	namespace {
48
49	namespace DFGCSEPhaseInternal {
50	static constexpr bool verbose = false;
51	}
52
53	class ImpureDataSlot {
54	WTF_MAKE_NONCOPYABLE(ImpureDataSlot);
55	WTF_MAKE_FAST_ALLOCATED;
56	public:
57	ImpureDataSlot(HeapLocation key, LazyNode value, unsigned hash)
58	: key(key), value(value), hash(hash)
59	{ }
60
61	HeapLocation key;
62	LazyNode value;
63	unsigned hash;
64	};
65
66	struct ImpureDataSlotHash : public DefaultHash<std::unique_ptr<ImpureDataSlot>> {
67	static unsigned hash(const std::unique_ptr<ImpureDataSlot>& key)
68	{
69	return key->hash;
70	}
71
72	static bool equal(const std::unique_ptr<ImpureDataSlot>& a, const std::unique_ptr<ImpureDataSlot>& b)
73	{
74	// The ImpureDataSlot are unique per table per HeapLocation. This lets us compare the key
75	// by just comparing the pointers of the unique ImpureDataSlots.
76	ASSERT(a != b \|\| a->key == b->key);
77	return a == b;
78	}
79	};
80
81	struct ImpureDataTranslator {
82	static unsigned hash(const HeapLocation& key)
83	{
84	return key.hash();
85	}
86
87	static bool equal(const std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key)
88	{
89	if (!slot)
90	return false;
91	if (HashTraits<std::unique_ptr<ImpureDataSlot>>::isDeletedValue(slot))
92	return false;
93	return slot->key == key;
94	}
95
96	static void translate(std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key, unsigned hashCode)
97	{
98	new (NotNull, std::addressof(slot)) std::unique_ptr<ImpureDataSlot>(new ImpureDataSlot {key, LazyNode(), hashCode});
99	}
100	};
101
102	class ImpureMap {
103	WTF_MAKE_FAST_ALLOCATED;
104	WTF_MAKE_NONCOPYABLE(ImpureMap);
105	public:
106	ImpureMap() = default;
107
108	ImpureMap(ImpureMap&& other)
109	{
110	m_abstractHeapStackMap.swap(other.m_abstractHeapStackMap);
111	m_fallbackStackMap.swap(other.m_fallbackStackMap);
112	m_heapMap.swap(other.m_heapMap);
113	#if !defined(NDEBUG)
114	m_debugImpureData.swap(other.m_debugImpureData);
115	#endif
116	}
117
118	const ImpureDataSlot* add(const HeapLocation& location, const LazyNode& node)
119	{
120	const ImpureDataSlot* result = addImpl(location, node);
121
122	#if !defined(NDEBUG)
123	auto addResult = m_debugImpureData.add(location, node);
124	ASSERT(!!result == !addResult.isNewEntry);
125	#endif
126	return result;
127	}
128
129	LazyNode get(const HeapLocation& location) const
130	{
131	LazyNode result = getImpl(location);
132	#if !defined(NDEBUG)
133	ASSERT(result == m_debugImpureData.get(location));
134	#endif
135	return result;
136	}
137
138	void clobber(AbstractHeap heap, bool clobberConservatively)
139	{
140	switch (heap.kind()) {
141	case World: {
142	clear();
143	break;
144	}
145	case SideState:
146	break;
147	case Stack: {
148	ASSERT(!heap.payload().isTop());
149	m_abstractHeapStackMap.remove(heap.payload().value());
150	if (clobberConservatively)
151	m_fallbackStackMap.clear();
152	else
153	clobber(m_fallbackStackMap, heap);
154	break;
155	}
156	default:
157	if (clobberConservatively)
158	m_heapMap.clear();
159	else
160	clobber(m_heapMap, heap);
161	break;
162	}
163	#if !defined(NDEBUG)
164	m_debugImpureData.removeIf([heap, clobberConservatively, this](const HashMap<HeapLocation, LazyNode>::KeyValuePairType& pair) -> bool {
165	switch (heap.kind()) {
166	case World:
167	case SideState:
168	break;
169	case Stack: {
170	if (!clobberConservatively)
171	break;
172	if (pair.key.heap().kind() == Stack) {
173	auto iterator = m_abstractHeapStackMap.find(pair.key.heap().payload().value());
174	if (iterator != m_abstractHeapStackMap.end() && iterator->value->key == pair.key)
175	return false;
176	return true;
177	}
178	break;
179	}
180	default: {
181	if (!clobberConservatively)
182	break;
183	AbstractHeapKind kind = pair.key.heap().kind();
184	if (kind != World && kind != SideState && kind != Stack)
185	return true;
186	break;
187	}
188	}
189	return heap.overlaps(pair.key.heap());
190	});
191	ASSERT(m_debugImpureData.size()
192	== (m_heapMap.size()
193	+ m_abstractHeapStackMap.size()
194	+ m_fallbackStackMap.size()));
195
196	const bool verifyClobber = false;
197	if (verifyClobber) {
198	for (auto& pair : m_debugImpureData)
199	ASSERT(!!get(pair.key));
200	}
201	#endif
202	}
203
204	void clear()
205	{
206	m_abstractHeapStackMap.clear();
207	m_fallbackStackMap.clear();
208	m_heapMap.clear();
209	#if !defined(NDEBUG)
210	m_debugImpureData.clear();
211	#endif
212	}
213
214	private:
215	typedef HashSet<std::unique_ptr<ImpureDataSlot>, ImpureDataSlotHash> Map;
216
217	const ImpureDataSlot* addImpl(const HeapLocation& location, const LazyNode& node)
218	{
219	switch (location.heap().kind()) {
220	case World:
221	case SideState:
222	RELEASE_ASSERT_NOT_REACHED();
223	case Stack: {
224	AbstractHeap abstractHeap = location.heap();
225	if (abstractHeap.payload().isTop())
226	return add(m_fallbackStackMap, location, node);
227	auto addResult = m_abstractHeapStackMap.add(abstractHeap.payload().value(), nullptr);
228	if (addResult.isNewEntry) {
229	addResult.iterator->value.reset(new ImpureDataSlot {location, node, 0});
230	return nullptr;
231	}
232	if (addResult.iterator->value->key == location)
233	return addResult.iterator->value.get();
234	return add(m_fallbackStackMap, location, node);
235	}
236	default:
237	return add(m_heapMap, location, node);
238	}
239	return nullptr;
240	}
241
242	LazyNode getImpl(const HeapLocation& location) const
243	{
244	switch (location.heap().kind()) {
245	case World:
246	case SideState:
247	RELEASE_ASSERT_NOT_REACHED();
248	case Stack: {
249	auto iterator = m_abstractHeapStackMap.find(location.heap().payload().value());
250	if (iterator != m_abstractHeapStackMap.end()
251	&& iterator->value->key == location)
252	return iterator->value->value;
253	return get(m_fallbackStackMap, location);
254	}
255	default:
256	return get(m_heapMap, location);
257	}
258	return LazyNode();
259	}
260
261	static const ImpureDataSlot* add(Map& map, const HeapLocation& location, const LazyNode& node)
262	{
263	auto result = map.add<ImpureDataTranslator>(location);
264	if (result.isNewEntry) {
265	(*result.iterator)->value = node;
266	return nullptr;
267	}
268	return result.iterator->get();
269	}
270
271	static LazyNode get(const Map& map, const HeapLocation& location)
272	{
273	auto iterator = map.find<ImpureDataTranslator>(location);
274	if (iterator != map.end())
275	return (*iterator)->value;
276	return LazyNode();
277	}
278
279	static void clobber(Map& map, AbstractHeap heap)
280	{
281	map.removeIf([heap](const std::unique_ptr<ImpureDataSlot>& slot) -> bool {
282	return heap.overlaps(slot->key.heap());
283	});
284	}
285
286	// The majority of Impure Stack Slots are unique per value.
287	// This is very useful for fast clobber(), we can just remove the slot addressed by AbstractHeap
288	// in O(1).
289	//
290	// When there are conflict, any additional HeapLocation is added in the fallback map.
291	// This works well because fallbackStackMap remains tiny.
292	//
293	// One cannot assume a unique ImpureData is in m_abstractHeapStackMap. It may have been
294	// a duplicate in the past and now only live in m_fallbackStackMap.
295	//
296	// Obviously, TOP always goes into m_fallbackStackMap since it does not have a unique value.
297	HashMap<int64_t, std::unique_ptr<ImpureDataSlot>, DefaultHash<int64_t>, WTF::SignedWithZeroKeyHashTraits<int64_t>> m_abstractHeapStackMap;
298	Map m_fallbackStackMap;
299
300	Map m_heapMap;
301
302	#if !defined(NDEBUG)
303	HashMap<HeapLocation, LazyNode> m_debugImpureData;
304	#endif
305	};
306
307	class LocalCSEPhase : public Phase {
308	public:
309	LocalCSEPhase(Graph& graph)
310	: Phase(graph, "local common subexpression elimination")
311	, m_smallBlock(graph)
312	, m_largeBlock(graph)
313	, m_hugeBlock(graph)
314	{
315	}
316
317	bool run()
318	{
319	ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
320	ASSERT(m_graph.m_form == ThreadedCPS \|\| m_graph.m_form == LoadStore);
321
322	bool changed = false;
323
324	m_graph.clearReplacements();
325
326	for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
327	BasicBlock* block = m_graph.block(blockIndex);
328	if (!block)
329	continue;
330
331	if (block->size() <= SmallMaps::capacity)
332	changed \|= m_smallBlock.run(block);
333	else if (block->size() <= Options::maxDFGNodesInBasicBlockForPreciseAnalysis())
334	changed \|= m_largeBlock.run(block);
335	else
336	changed \|= m_hugeBlock.run(block);
337	}
338
339	return changed;
340	}
341
342	private:
343	class SmallMaps {
344	public:
345	// This permits SmallMaps to be used for blocks that have up to 100 nodes. In practice,
346	// fewer than half of the nodes in a block have pure defs, and even fewer have impure defs.
347	// Thus, a capacity limit of 100 probably means that somewhere around ~40 things may end up
348	// in one of these "small" list-based maps. That number still seems largeish, except that
349	// the overhead of HashMaps can be quite high currently: clearing them, or even removing
350	// enough things from them, deletes (or resizes) their backing store eagerly. Hence
351	// HashMaps induce a lot of malloc traffic.
352	static constexpr unsigned capacity = 100;
353
354	SmallMaps()
355	: m_pureLength(0)
356	, m_impureLength(0)
357	{
358	}
359
360	void clear()
361	{
362	m_pureLength = 0;
363	m_impureLength = 0;
364	}
365
366	void write(AbstractHeap heap)
367	{
368	if (heap.kind() == SideState)
369	return;
370
371	for (unsigned i = 0; i < m_impureLength; ++i) {
372	if (heap.overlaps(m_impureMap[i].key.heap()))
373	m_impureMap[i--] = m_impureMap[--m_impureLength];
374	}
375	}
376
377	Node* addPure(PureValue value, Node* node)
378	{
379	for (unsigned i = m_pureLength; i--;) {
380	if (m_pureMap[i].key == value)
381	return m_pureMap[i].value;
382	}
383
384	RELEASE_ASSERT(m_pureLength < capacity);
385	m_pureMap[m_pureLength++] = WTF::KeyValuePair<PureValue, Node*>(value, node);
386	return nullptr;
387	}
388
389	LazyNode findReplacement(HeapLocation location)
390	{
391	for (unsigned i = m_impureLength; i--;) {
392	if (m_impureMap[i].key == location)
393	return m_impureMap[i].value;
394	}
395	return nullptr;
396	}
397
398	LazyNode addImpure(HeapLocation location, LazyNode node)
399	{
400	// FIXME: If we are using small maps, we must not def() derived values.
401	// For now the only derived values we def() are constant-based.
402	if (location.index() && !location.index().isNode())
403	return nullptr;
404	if (LazyNode result = findReplacement(location))
405	return result;
406	RELEASE_ASSERT(m_impureLength < capacity);
407	m_impureMap[m_impureLength++] = WTF::KeyValuePair<HeapLocation, LazyNode>(location, node);
408	return nullptr;
409	}
410
411	private:
412	WTF::KeyValuePair<PureValue, Node*> m_pureMap[capacity];
413	WTF::KeyValuePair<HeapLocation, LazyNode> m_impureMap[capacity];
414	unsigned m_pureLength;
415	unsigned m_impureLength;
416	};
417
418	class LargeMaps {
419	public:
420	LargeMaps()
421	{
422	}
423
424	void clear()
425	{
426	m_pureMap.clear();
427	m_impureMap.clear();
428	}
429
430	void write(AbstractHeap heap)
431	{
432	bool clobberConservatively = false;
433	m_impureMap.clobber(heap, clobberConservatively);
434	}
435
436	Node* addPure(PureValue value, Node* node)
437	{
438	auto result = m_pureMap.add(value, node);
439	if (result.isNewEntry)
440	return nullptr;
441	return result.iterator->value;
442	}
443
444	LazyNode findReplacement(HeapLocation location)
445	{
446	return m_impureMap.get(location);
447	}
448
449	LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
450	{
451	if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
452	return slot->value;
453	return LazyNode();
454	}
455
456	private:
457	HashMap<PureValue, Node*> m_pureMap;
458	ImpureMap m_impureMap;
459	};
460
461	// This is used only for huge basic blocks. Our usual CSE is quadratic complexity for # of DFG nodes in a basic block.
462	// HugeMaps model results conservatively to avoid an O(N^2) algorithm. In particular, we clear all the slots of the specified heap kind
463	// in ImpureMap instead of iterating slots and removing a matched slot. This change makes the complexity O(N).
464	// FIXME: We can make LargeMap O(N) without introducing conservative behavior if we track clobbering by hierarchical epochs.
465	// https://bugs.webkit.org/show_bug.cgi?id=200014
466	class HugeMaps {
467	public:
468	HugeMaps() = default;
469
470	void clear()
471	{
472	m_pureMap.clear();
473	m_impureMap.clear();
474	}
475
476	void write(AbstractHeap heap)
477	{
478	bool clobberConservatively = true;
479	m_impureMap.clobber(heap, clobberConservatively);
480	}
481
482	Node* addPure(PureValue value, Node* node)
483	{
484	auto result = m_pureMap.add(value, node);
485	if (result.isNewEntry)
486	return nullptr;
487	return result.iterator->value;
488	}
489
490	LazyNode findReplacement(HeapLocation location)
491	{
492	return m_impureMap.get(location);
493	}
494
495	LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
496	{
497	if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
498	return slot->value;
499	return LazyNode();
500	}
501
502	private:
503	HashMap<PureValue, Node*> m_pureMap;
504	ImpureMap m_impureMap;
505	};
506
507	template<typename Maps>
508	class BlockCSE {
509	public:
510	BlockCSE(Graph& graph)
511	: m_graph(graph)
512	, m_insertionSet(graph)
513	{
514	}
515
516	bool run(BasicBlock* block)
517	{
518	m_maps.clear();
519	m_changed = false;
520	m_block = block;
521
522	for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) {
523	m_node = block->at(nodeIndex);
524	m_graph.performSubstitution(m_node);
525
526	if (m_node->op() == Identity \|\| m_node->op() == IdentityWithProfile) {
527	m_node->replaceWith(m_graph, m_node->child1().node());
528	m_changed = true;
529	} else {
530	// This rule only makes sense for local CSE, since in SSA form we have already
531	// factored the bounds check out of the PutByVal. It's kind of gross, but we
532	// still have reason to believe that PutByValAlias is a good optimization and
533	// that it's better to do it with a single node rather than separating out the
534	// CheckInBounds.
535	if (m_node->op() == PutByVal \|\| m_node->op() == PutByValDirect) {
536	HeapLocation heap;
537
538	Node* base = m_graph.varArgChild(m_node, 0).node();
539	Node* index = m_graph.varArgChild(m_node, 1).node();
540	LocationKind indexedPropertyLoc = indexedPropertyLocForResultType(m_node->result());
541
542	ArrayMode mode = m_node->arrayMode();
543	switch (mode.type()) {
544	case Array::Int32:
545	if (!mode.isInBounds())
546	break;
547	heap = HeapLocation(indexedPropertyLoc, IndexedInt32Properties, base, index);
548	break;
549
550	case Array::Double: {
551	if (!mode.isInBounds())
552	break;
553	LocationKind kind = mode.isInBoundsSaneChain() ? IndexedPropertyDoubleSaneChainLoc : IndexedPropertyDoubleLoc;
554	heap = HeapLocation(kind, IndexedDoubleProperties, base, index);
555	break;
556	}
557
558	case Array::Contiguous:
559	if (!mode.isInBounds())
560	break;
561	heap = HeapLocation(indexedPropertyLoc, IndexedContiguousProperties, base, index);
562	break;
563
564	case Array::Int8Array:
565	case Array::Int16Array:
566	case Array::Int32Array:
567	case Array::Uint8Array:
568	case Array::Uint8ClampedArray:
569	case Array::Uint16Array:
570	case Array::Uint32Array:
571	case Array::Float32Array:
572	case Array::Float64Array:
573	if (!mode.isInBounds())
574	break;
575	heap = HeapLocation(
576	indexedPropertyLoc, TypedArrayProperties, base, index);
577	break;
578
579	default:
580	break;
581	}
582
583	if (!!heap && m_maps.findReplacement(heap))
584	m_node->setOp(PutByValAlias);
585	}
586
587	clobberize(m_graph, m_node, *this);
588	}
589	}
590
591	m_insertionSet.execute(block);
592
593	return m_changed;
594	}
595
596	void read(AbstractHeap) { }
597
598	void write(AbstractHeap heap)
599	{
600	m_maps.write(heap);
601	}
602
603	void def(PureValue value)
604	{
605	Node* match = m_maps.addPure(value, m_node);
606	if (!match)
607	return;
608
609	m_node->replaceWith(m_graph, match);
610	m_changed = true;
611	}
612
613	void def(const HeapLocation& location, const LazyNode& value)
614	{
615	LazyNode match = m_maps.addImpure(location, value);
616	if (!match)
617	return;
618
619	if (m_node->op() == GetLocal) {
620	// Usually the CPS rethreading phase does this. But it's OK for us to mess with
621	// locals so long as:
622	//
623	// - We dethread the graph. Any changes we make may invalidate the assumptions of
624	// our CPS form, particularly if this GetLocal is linked to the variablesAtTail.
625	//
626	// - We don't introduce a Phantom for the child of the GetLocal. This wouldn't be
627	// totally wrong but it would pessimize the code. Just because there is a
628	// GetLocal doesn't mean that the child was live. Simply rerouting the all uses
629	// of this GetLocal will preserve the live-at-exit information just fine.
630	//
631	// We accomplish the latter by just clearing the child; then the Phantom that we
632	// introduce won't have children and so it will eventually just be deleted.
633
634	m_node->child1() = Edge();
635	m_graph.dethread();
636	}
637
638	if (value.isNode() && value.asNode() == m_node) {
639	match.ensureIsNode(m_insertionSet, m_block, 0)->owner = m_block;
640	ASSERT(match.isNode());
641	m_node->replaceWith(m_graph, match.asNode());
642	m_changed = true;
643	}
644	}
645
646	private:
647	Graph& m_graph;
648
649	bool m_changed;
650	Node* m_node;
651	BasicBlock* m_block;
652
653	Maps m_maps;
654
655	InsertionSet m_insertionSet;
656	};
657
658	BlockCSE<SmallMaps> m_smallBlock;
659	BlockCSE<LargeMaps> m_largeBlock;
660	BlockCSE<HugeMaps> m_hugeBlock;
661	};
662
663	class GlobalCSEPhase : public Phase {
664	public:
665	GlobalCSEPhase(Graph& graph)
666	: Phase(graph, "global common subexpression elimination")
667	, m_impureDataMap(graph)
668	, m_insertionSet(graph)
669	{
670	}
671
672	bool run()
673	{
674	ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
675	ASSERT(m_graph.m_form == SSA);
676
677	m_graph.initializeNodeOwners();
678	m_graph.ensureSSADominators();
679
680	m_preOrder = m_graph.blocksInPreOrder();
681
682	// First figure out what gets clobbered by blocks. Node that this uses the preOrder list
683	// for convenience only.
684	for (unsigned i = m_preOrder.size(); i--;) {
685	m_block = m_preOrder[i];
686	m_impureData = &m_impureDataMap[m_block];
687	for (unsigned nodeIndex = m_block->size(); nodeIndex--;)
688	addWrites(m_graph, m_block->at(nodeIndex), m_impureData->writes);
689	}
690
691	// Based on my experience doing this before, what follows might have to be made iterative.
692	// Right now it doesn't have to be iterative because everything is dominator-bsed. But when
693	// validation is enabled, we check if iterating would find new CSE opportunities.
694
695	bool changed = iterate();
696
697	// FIXME: It should be possible to assert that CSE will not find any new opportunities if you
698	// run it a second time. Unfortunately, we cannot assert this right now. Note that if we did
699	// this, we'd have to first reset all of our state.
700	// https://bugs.webkit.org/show_bug.cgi?id=145853
701
702	return changed;
703	}
704
705	bool iterate()
706	{
707	if (DFGCSEPhaseInternal::verbose)
708	dataLog("Performing iteration.\n");
709
710	m_changed = false;
711	m_graph.clearReplacements();
712
713	for (unsigned i = 0; i < m_preOrder.size(); ++i) {
714	m_block = m_preOrder[i];
715	m_impureData = &m_impureDataMap[m_block];
716	m_writesSoFar.clear();
717
718	if (DFGCSEPhaseInternal::verbose)
719	dataLog("Processing block ", *m_block, ":\n");
720
721	for (unsigned nodeIndex = 0; nodeIndex < m_block->size(); ++nodeIndex) {
722	m_nodeIndex = nodeIndex;
723	m_node = m_block->at(nodeIndex);
724	if (DFGCSEPhaseInternal::verbose)
725	dataLog(" Looking at node ", m_node, ":\n");
726
727	m_graph.performSubstitution(m_node);
728
729	if (m_node->op() == Identity \|\| m_node->op() == IdentityWithProfile) {
730	m_node->replaceWith(m_graph, m_node->child1().node());
731	m_changed = true;
732	} else
733	clobberize(m_graph, m_node, *this);
734	}
735
736	m_insertionSet.execute(m_block);
737
738	m_impureData->didVisit = true;
739	}
740
741	return m_changed;
742	}
743
744	void read(AbstractHeap) { }
745
746	void write(AbstractHeap heap)
747	{
748	bool clobberConservatively = false;
749	m_impureData->availableAtTail.clobber(heap, clobberConservatively);
750	m_writesSoFar.add(heap);
751	}
752
753	void def(PureValue value)
754	{
755	// With pure values we do not have to worry about the possibility of some control flow path
756	// clobbering the value. So, we just search for all of the like values that have been
757	// computed. We pick one that is in a block that dominates ours. Note that this means that
758	// a PureValue will map to a list of nodes, since there may be many places in the control
759	// flow graph that compute a value but only one of them that dominates us. We may build up
760	// a large list of nodes that compute some value in the case of gnarly control flow. This
761	// is probably OK.
762
763	auto result = m_pureValues.add(value, Vector<Node*>());
764	if (result.isNewEntry) {
765	result.iterator->value.append(m_node);
766	return;
767	}
768
769	for (unsigned i = result.iterator->value.size(); i--;) {
770	Node* candidate = result.iterator->value[i];
771	if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
772	m_node->replaceWith(m_graph, candidate);
773	m_changed = true;
774	return;
775	}
776	}
777
778	result.iterator->value.append(m_node);
779	}
780
781	LazyNode findReplacement(HeapLocation location)
782	{
783	// At this instant, our "availableAtTail" reflects the set of things that are available in
784	// this block so far. We check this map to find block-local CSE opportunities before doing
785	// a global search.
786	LazyNode match = m_impureData->availableAtTail.get(location);
787	if (!!match) {
788	if (DFGCSEPhaseInternal::verbose)
789	dataLog(" Found local match: ", match, "\n");
790	return match;
791	}
792
793	// If it's not available at this point in the block, and at some prior point in the block
794	// we have clobbered this heap location, then there is no point in doing a global search.
795	if (m_writesSoFar.overlaps(location.heap())) {
796	if (DFGCSEPhaseInternal::verbose)
797	dataLog(" Not looking globally because of local clobber: ", m_writesSoFar, "\n");
798	return nullptr;
799	}
800
801	// This perfoms a backward search over the control flow graph to find some possible
802	// non-local def() that matches our heap location. Such a match is only valid if there does
803	// not exist any path from that def() to our block that contains a write() that overlaps
804	// our heap. This algorithm looks for both of these things (the matching def and the
805	// overlapping writes) in one backwards DFS pass.
806	//
807	// This starts by looking at the starting block's predecessors, and then it continues along
808	// their predecessors. As soon as this finds a possible def() - one that defines the heap
809	// location we want while dominating our starting block - it assumes that this one must be
810	// the match. It then lets the DFS over predecessors complete, but it doesn't add the
811	// def()'s predecessors; this ensures that any blocks we visit thereafter are on some path
812	// from the def() to us. As soon as the DFG finds a write() that overlaps the location's
813	// heap, it stops, assuming that there is no possible match. Note that the write() case may
814	// trigger before we find a def(), or after. Either way, the write() case causes this
815	// function to immediately return nullptr.
816	//
817	// If the write() is found before we find the def(), then we know that any def() we would
818	// find would have a path to us that trips over the write() and hence becomes invalid. This
819	// is just a direct outcome of us looking for a def() that dominates us. Given a block A
820	// that dominates block B - so that A is the one that would have the def() and B is our
821	// starting block - we know that any other block must either be on the path from A to B, or
822	// it must be on a path from the root to A, but not both. So, if we haven't found A yet but
823	// we already have found a block C that has a write(), then C must be on some path from A
824	// to B, which means that A's def() is invalid for our purposes. Hence, before we find the
825	// def(), stopping on write() is the right thing to do.
826	//
827	// Stopping on write() is also the right thing to do after we find the def(). After we find
828	// the def(), we don't add that block's predecessors to the search worklist. That means
829	// that henceforth the only blocks we will see in the search are blocks on the path from
830	// the def() to us. If any such block has a write() that clobbers our heap then we should
831	// give up.
832	//
833	// Hence this graph search algorithm ends up being deceptively simple: any overlapping
834	// write() causes us to immediately return nullptr, and a matching def() means that we just
835	// record it and neglect to visit its precessors.
836
837	Vector<BasicBlock*, 8> worklist;
838	Vector<BasicBlock*, 8> seenList;
839	BitVector seen;
840
841	for (unsigned i = m_block->predecessors.size(); i--;) {
842	BasicBlock* predecessor = m_block->predecessors[i];
843	if (!seen.get(predecessor->index)) {
844	worklist.append(predecessor);
845	seen.set(predecessor->index);
846	}
847	}
848
849	while (!worklist.isEmpty()) {
850	BasicBlock* block = worklist.takeLast();
851	seenList.append(block);
852
853	if (DFGCSEPhaseInternal::verbose)
854	dataLog(" Searching in block ", *block, "\n");
855	ImpureBlockData& data = m_impureDataMap[block];
856
857	// We require strict domination because this would only see things in our own block if
858	// they came after our position in the block. Clearly, while our block dominates
859	// itself, the things in the block after us don't dominate us.
860	if (m_graph.m_ssaDominators->strictlyDominates(block, m_block)) {
861	if (DFGCSEPhaseInternal::verbose)
862	dataLog(" It strictly dominates.\n");
863	DFG_ASSERT(m_graph, m_node, data.didVisit);
864	DFG_ASSERT(m_graph, m_node, !match);
865	match = data.availableAtTail.get(location);
866	if (DFGCSEPhaseInternal::verbose)
867	dataLog(" Availability: ", match, "\n");
868	if (!!match) {
869	// Don't examine the predecessors of a match. At this point we just want to
870	// establish that other blocks on the path from here to there don't clobber
871	// the location we're interested in.
872	continue;
873	}
874	}
875
876	if (DFGCSEPhaseInternal::verbose)
877	dataLog(" Dealing with write set ", data.writes, "\n");
878	if (data.writes.overlaps(location.heap())) {
879	if (DFGCSEPhaseInternal::verbose)
880	dataLog(" Clobbered.\n");
881	return nullptr;
882	}
883
884	for (unsigned i = block->predecessors.size(); i--;) {
885	BasicBlock* predecessor = block->predecessors[i];
886	if (!seen.get(predecessor->index)) {
887	worklist.append(predecessor);
888	seen.set(predecessor->index);
889	}
890	}
891	}
892
893	if (!match)
894	return nullptr;
895
896	// Cache the results for next time. We cache them both for this block and for all of our
897	// predecessors, since even though we've already visited our predecessors, our predecessors
898	// probably have successors other than us.
899	// FIXME: Consider caching failed searches as well, when match is null. It's not clear that
900	// the reduction in compile time would warrant the increase in complexity, though.
901	// https://bugs.webkit.org/show_bug.cgi?id=134876
902	for (BasicBlock* block : seenList)
903	m_impureDataMap[block].availableAtTail.add(location, match);
904	m_impureData->availableAtTail.add(location, match);
905
906	return match;
907	}
908
909	void def(HeapLocation location, LazyNode value)
910	{
911	if (DFGCSEPhaseInternal::verbose)
912	dataLog(" Got heap location def: ", location, " -> ", value, "\n");
913
914	LazyNode match = findReplacement(location);
915
916	if (DFGCSEPhaseInternal::verbose)
917	dataLog(" Got match: ", match, "\n");
918
919	if (!match) {
920	if (DFGCSEPhaseInternal::verbose)
921	dataLog(" Adding at-tail mapping: ", location, " -> ", value, "\n");
922	auto result = m_impureData->availableAtTail.add(location, value);
923	ASSERT_UNUSED(result, !result);
924	return;
925	}
926
927	if (value.isNode() && value.asNode() == m_node) {
928	if (!match.isNode()) {
929	// We need to properly record the constant in order to use an existing one if applicable.
930	// This ensures that re-running GCSE will not find new optimizations.
931	match.ensureIsNode(m_insertionSet, m_block, m_nodeIndex)->owner = m_block;
932	auto result = m_pureValues.add(PureValue(match.asNode(), match->constant()), Vector<Node*>());
933	bool replaced = false;
934	if (!result.isNewEntry) {
935	for (unsigned i = result.iterator->value.size(); i--;) {
936	Node* candidate = result.iterator->value[i];
937	if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
938	ASSERT(candidate);
939	match->replaceWith(m_graph, candidate);
940	match.setNode(candidate);
941	replaced = true;
942	break;
943	}
944	}
945	}
946	if (!replaced)
947	result.iterator->value.append(match.asNode());
948	}
949	ASSERT(match.asNode());
950	m_node->replaceWith(m_graph, match.asNode());
951	m_changed = true;
952	}
953	}
954
955	struct ImpureBlockData {
956	ImpureBlockData()
957	: didVisit(false)
958	{
959	}
960
961	ClobberSet writes;
962	ImpureMap availableAtTail;
963	bool didVisit;
964	};
965
966	Vector<BasicBlock*> m_preOrder;
967
968	PureMultiMap m_pureValues;
969	BlockMap<ImpureBlockData> m_impureDataMap;
970
971	BasicBlock* m_block;
972	Node* m_node;
973	unsigned m_nodeIndex;
974	ImpureBlockData* m_impureData;
975	ClobberSet m_writesSoFar;
976	InsertionSet m_insertionSet;
977
978	bool m_changed;
979	};
980
981	} // anonymous namespace
982
983	bool performLocalCSE(Graph& graph)
984	{
985	return runPhase<LocalCSEPhase>(graph);
986	}
987
988	bool performGlobalCSE(Graph& graph)
989	{
990	return runPhase<GlobalCSEPhase>(graph);
991	}
992
993	} } // namespace JSC::DFG
994
995	#endif // ENABLE(DFG_JIT)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: webkit/trunk/Source/JavaScriptCore/dfg/DFGCSEPhase.cpp

Download in other formats: