DFGCSEPhase.cpp source code [webkit/Source/JavaScriptCore/dfg/DFGCSEPhase.cpp]

1	/*
2	* Copyright (C) 2011-2018 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "DFGCSEPhase.h"
28
29	#if ENABLE(DFG_JIT)
30
31	#include "DFGAbstractHeap.h"
32	#include "DFGBlockMapInlines.h"
33	#include "DFGClobberSet.h"
34	#include "DFGClobberize.h"
35	#include "DFGDominators.h"
36	#include "DFGGraph.h"
37	#include "DFGPhase.h"
38	#include "JSCInlines.h"
39	#include <array>
40
41	namespace JSC { namespace DFG {
42
43	// This file contains two CSE implementations: local and global. LocalCSE typically runs when we're
44	// in DFG mode, i.e. we want to compile quickly. LocalCSE contains a lot of optimizations for
45	// compile time. GlobalCSE, on the other hand, is fairly straight-forward. It will find more
46	// optimization opportunities by virtue of being global.
47
48	namespace {
49
50	namespace DFGCSEPhaseInternal {
51	static const bool verbose = false;
52	}
53
54	class ImpureDataSlot {
55	WTF_MAKE_NONCOPYABLE(ImpureDataSlot);
56	WTF_MAKE_FAST_ALLOCATED;
57	public:
58	ImpureDataSlot(HeapLocation key, LazyNode value, unsigned hash)
59	: key (key), value (value), hash(hash)
60	{ }
61
62	HeapLocation key;
63	LazyNode value;
64	unsigned hash;
65	};
66
67	struct ImpureDataSlotHash : public DefaultHash<std::unique_ptr<ImpureDataSlot>>::Hash {
68	static unsigned hash(const std::unique_ptr<ImpureDataSlot>& key)
69	{
70	return key ->hash;
71	}
72
73	static bool equal(const std::unique_ptr<ImpureDataSlot>& a, const std::unique_ptr<ImpureDataSlot>& b)
74	{
75	// The ImpureDataSlot are unique per table per HeapLocation. This lets us compare the key
76	// by just comparing the pointers of the unique ImpureDataSlots.
77	ASSERT(a != b \|\| a ->key == b ->key);
78	return a == b;
79	}
80	};
81
82	struct ImpureDataTranslator {
83	static unsigned hash(const HeapLocation& key)
84	{
85	return key.hash();
86	}
87
88	static bool equal(const std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key)
89	{
90	if (!slot)
91	return false;
92	if (HashTraits<std::unique_ptr<ImpureDataSlot>>::isDeletedValue(slot))
93	return false;
94	return slot ->key == key;
95	}
96
97	static void translate(std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key, unsigned hashCode)
98	{
99	new (NotNull, std::addressof(slot)) std::unique_ptr<ImpureDataSlot>(new ImpureDataSlot {key, LazyNode (), hashCode});
100	}
101	};
102
103	class ImpureMap {
104	WTF_MAKE_FAST_ALLOCATED;
105	WTF_MAKE_NONCOPYABLE(ImpureMap);
106	public:
107	ImpureMap() = default;
108
109	ImpureMap(ImpureMap&& other)
110	{
111	m_abstractHeapStackMap.swap(other.m_abstractHeapStackMap);
112	m_fallbackStackMap.swap(other.m_fallbackStackMap);
113	m_heapMap.swap(other.m_heapMap);
114	#if !defined(NDEBUG)
115	m_debugImpureData.swap(other.m_debugImpureData);
116	#endif
117	}
118
119	const ImpureDataSlot* add(const HeapLocation& location, const LazyNode& node)
120	{
121	const ImpureDataSlot* result = addImpl(location, node);
122
123	#if !defined(NDEBUG)
124	auto addResult = m_debugImpureData.add(location, node);
125	ASSERT(!!result == !addResult.isNewEntry);
126	#endif
127	return result;
128	}
129
130	LazyNode get(const HeapLocation& location) const
131	{
132	LazyNode result = getImpl(location);
133	#if !defined(NDEBUG)
134	ASSERT(result == m_debugImpureData.get(location));
135	#endif
136	return result;
137	}
138
139	void clobber(AbstractHeap heap)
140	{
141	switch (heap.kind()) {
142	case World: {
143	clear();
144	break;
145	}
146	case SideState:
147	break;
148	case Stack: {
149	ASSERT(!heap.payload().isTop());
150	ASSERT(heap.payload().value() == heap.payload().value32());
151	m_abstractHeapStackMap.remove(heap.payload().value32());
152	clobber(m_fallbackStackMap, heap);
153	break;
154	}
155	default:
156	clobber(m_heapMap, heap);
157	break;
158	}
159	#if !defined(NDEBUG)
160	m_debugImpureData.removeIf([heap](const HashMap<HeapLocation, LazyNode>::KeyValuePairType& pair) -> bool {
161	return heap.overlaps(pair.key.heap());
162	});
163	ASSERT(m_debugImpureData.size()
164	== (m_heapMap.size()
165	+ m_abstractHeapStackMap.size()
166	+ m_fallbackStackMap.size()));
167
168	const bool verifyClobber = false;
169	if (verifyClobber) {
170	for (auto& pair : m_debugImpureData)
171	ASSERT(!!get(pair.key));
172	}
173	#endif
174	}
175
176	void clear()
177	{
178	m_abstractHeapStackMap.clear();
179	m_fallbackStackMap.clear();
180	m_heapMap.clear();
181	#if !defined(NDEBUG)
182	m_debugImpureData.clear();
183	#endif
184	}
185
186	private:
187	typedef HashSet<std::unique_ptr<ImpureDataSlot>, ImpureDataSlotHash> Map;
188
189	const ImpureDataSlot* addImpl(const HeapLocation& location, const LazyNode& node)
190	{
191	switch (location.heap().kind()) {
192	case World:
193	case SideState:
194	RELEASE_ASSERT_NOT_REACHED();
195	case Stack: {
196	AbstractHeap abstractHeap = location.heap();
197	if (abstractHeap.payload().isTop())
198	return add(m_fallbackStackMap, location, node);
199	ASSERT(abstractHeap.payload().value() == abstractHeap.payload().value32());
200	auto addResult = m_abstractHeapStackMap.add(abstractHeap.payload().value32(), nullptr);
201	if (addResult.isNewEntry) {
202	addResult.iterator ->value.reset(new ImpureDataSlot {location, node, `0`});
203	return nullptr;
204	}
205	if (addResult.iterator ->value ->key == location)
206	return addResult.iterator ->value.get();
207	return add(m_fallbackStackMap, location, node);
208	}
209	default:
210	return add(m_heapMap, location, node);
211	}
212	return nullptr;
213	}
214
215	LazyNode getImpl(const HeapLocation& location) const
216	{
217	switch (location.heap().kind()) {
218	case World:
219	case SideState:
220	RELEASE_ASSERT_NOT_REACHED();
221	case Stack: {
222	ASSERT(location.heap().payload().value() == location.heap().payload().value32());
223	auto iterator = m_abstractHeapStackMap.find(location.heap().payload().value32());
224	if (iterator != m_abstractHeapStackMap.end()
225	&& iterator ->value ->key == location)
226	return iterator ->value ->value;
227	return get(m_fallbackStackMap, location);
228	}
229	default:
230	return get(m_heapMap, location);
231	}
232	return LazyNode ();
233	}
234
235	static const ImpureDataSlot* add(Map& map, const HeapLocation& location, const LazyNode& node)
236	{
237	auto result = map.add<ImpureDataTranslator>(location);
238	if (result.isNewEntry) {
239	(*result.iterator)->value = node;
240	return nullptr;
241	}
242	return result.iterator ->get();
243	}
244
245	static LazyNode get(const Map& map, const HeapLocation& location)
246	{
247	auto iterator = map.find<ImpureDataTranslator>(location);
248	if (iterator != map.end())
249	return (*iterator)->value;
250	return LazyNode ();
251	}
252
253	static void clobber(Map& map, AbstractHeap heap)
254	{
255	map.removeIf([heap](const std::unique_ptr<ImpureDataSlot>& slot) -> bool {
256	return heap.overlaps(slot ->key.heap());
257	});
258	}
259
260	// The majority of Impure Stack Slots are unique per value.
261	// This is very useful for fast clobber(), we can just remove the slot addressed by AbstractHeap
262	// in O(1).
263	//
264	// When there are conflict, any additional HeapLocation is added in the fallback map.
265	// This works well because fallbackStackMap remains tiny.
266	//
267	// One cannot assume a unique ImpureData is in m_abstractHeapStackMap. It may have been
268	// a duplicate in the past and now only live in m_fallbackStackMap.
269	//
270	// Obviously, TOP always goes into m_fallbackStackMap since it does not have a unique value.
271	HashMap<int32_t, std::unique_ptr<ImpureDataSlot>, DefaultHash<int32_t>::Hash, WTF::SignedWithZeroKeyHashTraits<int32_t>> m_abstractHeapStackMap;
272	Map m_fallbackStackMap;
273
274	Map m_heapMap;
275
276	#if !defined(NDEBUG)
277	HashMap<HeapLocation, LazyNode> m_debugImpureData;
278	#endif
279	};
280
281	class LocalCSEPhase : public Phase {
282	public:
283	LocalCSEPhase(Graph& graph)
284	: Phase (graph, "local common subexpression elimination")
285	, m_smallBlock (graph)
286	, m_largeBlock (graph)
287	{
288	}
289
290	bool run()
291	{
292	ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
293	ASSERT(m_graph.m_form == ThreadedCPS \|\| m_graph.m_form == LoadStore);
294
295	bool changed = false;
296
297	m_graph.clearReplacements();
298
299	for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
300	BasicBlock* block = m_graph.block(blockIndex);
301	if (!block)
302	continue;
303
304	if (block->size() <= SmallMaps::capacity)
305	changed \|= m_smallBlock.run(block);
306	else
307	changed \|= m_largeBlock.run(block);
308	}
309
310	return changed;
311	}
312
313	private:
314	class SmallMaps {
315	public:
316	// This permits SmallMaps to be used for blocks that have up to 100 nodes. In practice,
317	// fewer than half of the nodes in a block have pure defs, and even fewer have impure defs.
318	// Thus, a capacity limit of 100 probably means that somewhere around ~40 things may end up
319	// in one of these "small" list-based maps. That number still seems largeish, except that
320	// the overhead of HashMaps can be quite high currently: clearing them, or even removing
321	// enough things from them, deletes (or resizes) their backing store eagerly. Hence
322	// HashMaps induce a lot of malloc traffic.
323	static const unsigned capacity = `100`;
324
325	SmallMaps()
326	: m_pureLength(`0`)
327	, m_impureLength(`0`)
328	{
329	}
330
331	void clear()
332	{
333	m_pureLength = `0`;
334	m_impureLength = `0`;
335	}
336
337	void write(AbstractHeap heap)
338	{
339	if (heap.kind() == SideState)
340	return;
341
342	for (unsigned i = `0`; i < m_impureLength; ++i) {
343	if (heap.overlaps(m_impureMap[i].key.heap()))
344	m_impureMap[i--] = m_impureMap[--m_impureLength];
345	}
346	}
347
348	Node* addPure(PureValue value, Node* node)
349	{
350	for (unsigned i = m_pureLength; i--;) {
351	if (m_pureMap[i].key == value)
352	return m_pureMap[i].value;
353	}
354
355	ASSERT(m_pureLength < capacity);
356	m_pureMap[m_pureLength++] = WTF::KeyValuePair<PureValue, Node*>(value, node);
357	return nullptr;
358	}
359
360	LazyNode findReplacement(HeapLocation location)
361	{
362	for (unsigned i = m_impureLength; i--;) {
363	if (m_impureMap[i].key == location)
364	return m_impureMap[i].value;
365	}
366	return nullptr;
367	}
368
369	LazyNode addImpure(HeapLocation location, LazyNode node)
370	{
371	// FIXME: If we are using small maps, we must not def() derived values.
372	// For now the only derived values we def() are constant-based.
373	if (location.index() && !location.index().isNode())
374	return nullptr;
375	if (LazyNode result = findReplacement(location))
376	return result;
377	ASSERT(m_impureLength < capacity);
378	m_impureMap[m_impureLength++] = WTF::KeyValuePair<HeapLocation, LazyNode>(location, node);
379	return nullptr;
380	}
381
382	private:
383	WTF::KeyValuePair<PureValue, Node*> m_pureMap[capacity];
384	WTF::KeyValuePair<HeapLocation, LazyNode> m_impureMap[capacity];
385	unsigned m_pureLength;
386	unsigned m_impureLength;
387	};
388
389	class LargeMaps {
390	public:
391	LargeMaps()
392	{
393	}
394
395	void clear()
396	{
397	m_pureMap.clear();
398	m_impureMap.clear();
399	}
400
401	void write(AbstractHeap heap)
402	{
403	m_impureMap.clobber(heap);
404	}
405
406	Node* addPure(PureValue value, Node* node)
407	{
408	auto result = m_pureMap.add(value, node);
409	if (result.isNewEntry)
410	return nullptr;
411	return result.iterator ->value;
412	}
413
414	LazyNode findReplacement(HeapLocation location)
415	{
416	return m_impureMap.get(location);
417	}
418
419	LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
420	{
421	if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
422	return slot->value;
423	return LazyNode ();
424	}
425
426	private:
427	HashMap<PureValue, Node*> m_pureMap;
428	ImpureMap m_impureMap;
429	};
430
431	template<typename Maps>
432	class BlockCSE {
433	public:
434	BlockCSE(Graph& graph)
435	: m_graph(graph)
436	, m_insertionSet (graph)
437	{
438	}
439
440	bool run(BasicBlock* block)
441	{
442	m_maps.clear();
443	m_changed = false;
444	m_block = block;
445
446	for (unsigned nodeIndex = `0`; nodeIndex < block->size(); ++nodeIndex) {
447	m_node = block->at(nodeIndex);
448	m_graph.performSubstitution(m_node);
449
450	if (m_node->op() == Identity \|\| m_node->op() == IdentityWithProfile) {
451	m_node->replaceWith(m_graph, m_node->child1().node());
452	m_changed = true;
453	} else {
454	// This rule only makes sense for local CSE, since in SSA form we have already
455	// factored the bounds check out of the PutByVal. It's kind of gross, but we
456	// still have reason to believe that PutByValAlias is a good optimization and
457	// that it's better to do it with a single node rather than separating out the
458	// CheckInBounds.
459	if (m_node->op() == PutByVal \|\| m_node->op() == PutByValDirect) {
460	HeapLocation heap;
461
462	Node* base = m_graph.varArgChild(m_node, `0`).node();
463	Node* index = m_graph.varArgChild(m_node, `1`).node();
464	LocationKind indexedPropertyLoc = indexedPropertyLocForResultType(m_node->result());
465
466	ArrayMode mode = m_node->arrayMode();
467	switch (mode.type()) {
468	case Array::Int32:
469	if (!mode.isInBounds())
470	break;
471	heap = HeapLocation (indexedPropertyLoc, IndexedInt32Properties, base, index);
472	break;
473
474	case Array::Double: {
475	if (!mode.isInBounds())
476	break;
477	LocationKind kind = mode.isSaneChain() ? IndexedPropertyDoubleSaneChainLoc : IndexedPropertyDoubleLoc;
478	heap = HeapLocation (kind, IndexedDoubleProperties, base, index);
479	break;
480	}
481
482	case Array::Contiguous:
483	if (!mode.isInBounds())
484	break;
485	heap = HeapLocation (indexedPropertyLoc, IndexedContiguousProperties, base, index);
486	break;
487
488	case Array::Int8Array:
489	case Array::Int16Array:
490	case Array::Int32Array:
491	case Array::Uint8Array:
492	case Array::Uint8ClampedArray:
493	case Array::Uint16Array:
494	case Array::Uint32Array:
495	case Array::Float32Array:
496	case Array::Float64Array:
497	if (!mode.isInBounds())
498	break;
499	heap = HeapLocation (
500	indexedPropertyLoc, TypedArrayProperties, base, index);
501	break;
502
503	default:
504	break;
505	}
506
507	if (!!heap && m_maps.findReplacement(heap))
508	m_node->setOp(PutByValAlias);
509	}
510
511	clobberize(m_graph, m_node, *this);
512	}
513	}
514
515	m_insertionSet.execute(block);
516
517	return m_changed;
518	}
519
520	void read(AbstractHeap) { }
521
522	void write(AbstractHeap heap)
523	{
524	m_maps.write(heap);
525	}
526
527	void def(PureValue value)
528	{
529	Node* match = m_maps.addPure(value, m_node);
530	if (!match)
531	return;
532
533	m_node->replaceWith(m_graph, match);
534	m_changed = true;
535	}
536
537	void def(const HeapLocation& location, const LazyNode& value)
538	{
539	LazyNode match = m_maps.addImpure(location, value);
540	if (!match)
541	return;
542
543	if (m_node->op() == GetLocal) {
544	// Usually the CPS rethreading phase does this. But it's OK for us to mess with
545	// locals so long as:
546	//
547	// - We dethread the graph. Any changes we make may invalidate the assumptions of
548	// our CPS form, particularly if this GetLocal is linked to the variablesAtTail.
549	//
550	// - We don't introduce a Phantom for the child of the GetLocal. This wouldn't be
551	// totally wrong but it would pessimize the code. Just because there is a
552	// GetLocal doesn't mean that the child was live. Simply rerouting the all uses
553	// of this GetLocal will preserve the live-at-exit information just fine.
554	//
555	// We accomplish the latter by just clearing the child; then the Phantom that we
556	// introduce won't have children and so it will eventually just be deleted.
557
558	m_node->child1() = Edge ();
559	m_graph.dethread();
560	}
561
562	if (value.isNode() && value.asNode() == m_node) {
563	match.ensureIsNode(m_insertionSet, m_block, `0`)->owner = m_block;
564	ASSERT(match.isNode());
565	m_node->replaceWith(m_graph, match.asNode());
566	m_changed = true;
567	}
568	}
569
570	private:
571	Graph& m_graph;
572
573	bool m_changed;
574	Node* m_node;
575	BasicBlock* m_block;
576
577	Maps m_maps;
578
579	InsertionSet m_insertionSet;
580	};
581
582	BlockCSE<SmallMaps> m_smallBlock;
583	BlockCSE<LargeMaps> m_largeBlock;
584	};
585
586	class GlobalCSEPhase : public Phase {
587	public:
588	GlobalCSEPhase(Graph& graph)
589	: Phase (graph, "global common subexpression elimination")
590	, m_impureDataMap (graph)
591	, m_insertionSet (graph)
592	{
593	}
594
595	bool run()
596	{
597	ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
598	ASSERT(m_graph.m_form == SSA);
599
600	m_graph.initializeNodeOwners();
601	m_graph.ensureSSADominators();
602
603	m_preOrder = m_graph.blocksInPreOrder();
604
605	// First figure out what gets clobbered by blocks. Node that this uses the preOrder list
606	// for convenience only.
607	for (unsigned i = m_preOrder.size(); i--;) {
608	m_block = m_preOrder [i];
609	m_impureData = &m_impureDataMap [m_block];
610	for (unsigned nodeIndex = m_block->size(); nodeIndex--;)
611	addWrites(m_graph, m_block->at(nodeIndex), m_impureData->writes);
612	}
613
614	// Based on my experience doing this before, what follows might have to be made iterative.
615	// Right now it doesn't have to be iterative because everything is dominator-bsed. But when
616	// validation is enabled, we check if iterating would find new CSE opportunities.
617
618	bool changed = iterate();
619
620	// FIXME: It should be possible to assert that CSE will not find any new opportunities if you
621	// run it a second time. Unfortunately, we cannot assert this right now. Note that if we did
622	// this, we'd have to first reset all of our state.
623	// https://bugs.webkit.org/show_bug.cgi?id=145853
624
625	return changed;
626	}
627
628	bool iterate()
629	{
630	if (DFGCSEPhaseInternal::verbose)
631	dataLog("Performing iteration.\n");
632
633	m_changed = false;
634	m_graph.clearReplacements();
635
636	for (unsigned i = `0`; i < m_preOrder.size(); ++i) {
637	m_block = m_preOrder [i];
638	m_impureData = &m_impureDataMap [m_block];
639	m_writesSoFar.clear();
640
641	if (DFGCSEPhaseInternal::verbose)
642	dataLog("Processing block ", *m_block, ":\n");
643
644	for (unsigned nodeIndex = `0`; nodeIndex < m_block->size(); ++nodeIndex) {
645	m_nodeIndex = nodeIndex;
646	m_node = m_block->at(nodeIndex);
647	if (DFGCSEPhaseInternal::verbose)
648	dataLog(" Looking at node ", m_node, ":\n");
649
650	m_graph.performSubstitution(m_node);
651
652	if (m_node->op() == Identity \|\| m_node->op() == IdentityWithProfile) {
653	m_node->replaceWith(m_graph, m_node->child1().node());
654	m_changed = true;
655	} else
656	clobberize(m_graph, m_node, *this);
657	}
658
659	m_insertionSet.execute(m_block);
660
661	m_impureData->didVisit = true;
662	}
663
664	return m_changed;
665	}
666
667	void read(AbstractHeap) { }
668
669	void write(AbstractHeap heap)
670	{
671	m_impureData->availableAtTail.clobber(heap);
672	m_writesSoFar.add(heap);
673	}
674
675	void def(PureValue value)
676	{
677	// With pure values we do not have to worry about the possibility of some control flow path
678	// clobbering the value. So, we just search for all of the like values that have been
679	// computed. We pick one that is in a block that dominates ours. Note that this means that
680	// a PureValue will map to a list of nodes, since there may be many places in the control
681	// flow graph that compute a value but only one of them that dominates us. We may build up
682	// a large list of nodes that compute some value in the case of gnarly control flow. This
683	// is probably OK.
684
685	auto result = m_pureValues.add(value, Vector<Node*>());
686	if (result.isNewEntry) {
687	result.iterator ->value.append(m_node);
688	return;
689	}
690
691	for (unsigned i = result.iterator ->value.size(); i--;) {
692	Node* candidate = result.iterator ->value [i];
693	if (m_graph.m_ssaDominators ->dominates(candidate->owner, m_block)) {
694	m_node->replaceWith(m_graph, candidate);
695	m_changed = true;
696	return;
697	}
698	}
699
700	result.iterator ->value.append(m_node);
701	}
702
703	LazyNode findReplacement(HeapLocation location)
704	{
705	// At this instant, our "availableAtTail" reflects the set of things that are available in
706	// this block so far. We check this map to find block-local CSE opportunities before doing
707	// a global search.
708	LazyNode match = m_impureData->availableAtTail.get(location);
709	if (!!match) {
710	if (DFGCSEPhaseInternal::verbose)
711	dataLog(" Found local match: ", match, "\n");
712	return match;
713	}
714
715	// If it's not available at this point in the block, and at some prior point in the block
716	// we have clobbered this heap location, then there is no point in doing a global search.
717	if (m_writesSoFar.overlaps(location.heap())) {
718	if (DFGCSEPhaseInternal::verbose)
719	dataLog(" Not looking globally because of local clobber: ", m_writesSoFar, "\n");
720	return nullptr;
721	}
722
723	// This perfoms a backward search over the control flow graph to find some possible
724	// non-local def() that matches our heap location. Such a match is only valid if there does
725	// not exist any path from that def() to our block that contains a write() that overlaps
726	// our heap. This algorithm looks for both of these things (the matching def and the
727	// overlapping writes) in one backwards DFS pass.
728	//
729	// This starts by looking at the starting block's predecessors, and then it continues along
730	// their predecessors. As soon as this finds a possible def() - one that defines the heap
731	// location we want while dominating our starting block - it assumes that this one must be
732	// the match. It then lets the DFS over predecessors complete, but it doesn't add the
733	// def()'s predecessors; this ensures that any blocks we visit thereafter are on some path
734	// from the def() to us. As soon as the DFG finds a write() that overlaps the location's
735	// heap, it stops, assuming that there is no possible match. Note that the write() case may
736	// trigger before we find a def(), or after. Either way, the write() case causes this
737	// function to immediately return nullptr.
738	//
739	// If the write() is found before we find the def(), then we know that any def() we would
740	// find would have a path to us that trips over the write() and hence becomes invalid. This
741	// is just a direct outcome of us looking for a def() that dominates us. Given a block A
742	// that dominates block B - so that A is the one that would have the def() and B is our
743	// starting block - we know that any other block must either be on the path from A to B, or
744	// it must be on a path from the root to A, but not both. So, if we haven't found A yet but
745	// we already have found a block C that has a write(), then C must be on some path from A
746	// to B, which means that A's def() is invalid for our purposes. Hence, before we find the
747	// def(), stopping on write() is the right thing to do.
748	//
749	// Stopping on write() is also the right thing to do after we find the def(). After we find
750	// the def(), we don't add that block's predecessors to the search worklist. That means
751	// that henceforth the only blocks we will see in the search are blocks on the path from
752	// the def() to us. If any such block has a write() that clobbers our heap then we should
753	// give up.
754	//
755	// Hence this graph search algorithm ends up being deceptively simple: any overlapping
756	// write() causes us to immediately return nullptr, and a matching def() means that we just
757	// record it and neglect to visit its precessors.
758
759	Vector<BasicBlock*, `8`> worklist;
760	Vector<BasicBlock*, `8`> seenList;
761	BitVector seen;
762
763	for (unsigned i = m_block->predecessors.size(); i--;) {
764	BasicBlock* predecessor = m_block->predecessors [i];
765	if (!seen.get(predecessor->index)) {
766	worklist.append(predecessor);
767	seen.set(predecessor->index);
768	}
769	}
770
771	while (!worklist.isEmpty()) {
772	BasicBlock* block = worklist.takeLast();
773	seenList.append(block);
774
775	if (DFGCSEPhaseInternal::verbose)
776	dataLog(" Searching in block ", *block, "\n");
777	ImpureBlockData& data = m_impureDataMap [block];
778
779	// We require strict domination because this would only see things in our own block if
780	// they came after* our position in the block. Clearly, while our block dominates*
781	// itself, the things in the block after us don't dominate us.
782	if (m_graph.m_ssaDominators ->strictlyDominates(block, m_block)) {
783	if (DFGCSEPhaseInternal::verbose)
784	dataLog(" It strictly dominates.\n");
785	DFG_ASSERT(m_graph, m_node, data.didVisit);
786	DFG_ASSERT(m_graph, m_node, !match);
787	match = data.availableAtTail.get(location);
788	if (DFGCSEPhaseInternal::verbose)
789	dataLog(" Availability: ", match, "\n");
790	if (!!match) {
791	// Don't examine the predecessors of a match. At this point we just want to
792	// establish that other blocks on the path from here to there don't clobber
793	// the location we're interested in.
794	continue;
795	}
796	}
797
798	if (DFGCSEPhaseInternal::verbose)
799	dataLog(" Dealing with write set ", data.writes, "\n");
800	if (data.writes.overlaps(location.heap())) {
801	if (DFGCSEPhaseInternal::verbose)
802	dataLog(" Clobbered.\n");
803	return nullptr;
804	}
805
806	for (unsigned i = block->predecessors.size(); i--;) {
807	BasicBlock* predecessor = block->predecessors [i];
808	if (!seen.get(predecessor->index)) {
809	worklist.append(predecessor);
810	seen.set(predecessor->index);
811	}
812	}
813	}
814
815	if (!match)
816	return nullptr;
817
818	// Cache the results for next time. We cache them both for this block and for all of our
819	// predecessors, since even though we've already visited our predecessors, our predecessors
820	// probably have successors other than us.
821	// FIXME: Consider caching failed searches as well, when match is null. It's not clear that
822	// the reduction in compile time would warrant the increase in complexity, though.
823	// https://bugs.webkit.org/show_bug.cgi?id=134876
824	for (BasicBlock* block : seenList)
825	m_impureDataMap [block].availableAtTail.add(location, match);
826	m_impureData->availableAtTail.add(location, match);
827
828	return match;
829	}
830
831	void def(HeapLocation location, LazyNode value)
832	{
833	if (DFGCSEPhaseInternal::verbose)
834	dataLog(" Got heap location def: ", location, " -> ", value, "\n");
835
836	LazyNode match = findReplacement(location);
837
838	if (DFGCSEPhaseInternal::verbose)
839	dataLog(" Got match: ", match, "\n");
840
841	if (!match) {
842	if (DFGCSEPhaseInternal::verbose)
843	dataLog(" Adding at-tail mapping: ", location, " -> ", value, "\n");
844	auto result = m_impureData->availableAtTail.add(location, value);
845	ASSERT_UNUSED(result, !result);
846	return;
847	}
848
849	if (value.isNode() && value.asNode() == m_node) {
850	if (!match.isNode()) {
851	// We need to properly record the constant in order to use an existing one if applicable.
852	// This ensures that re-running GCSE will not find new optimizations.
853	match.ensureIsNode(m_insertionSet, m_block, m_nodeIndex)->owner = m_block;
854	auto result = m_pureValues.add(PureValue (match.asNode(), match ->constant()), Vector<Node*>());
855	bool replaced = false;
856	if (!result.isNewEntry) {
857	for (unsigned i = result.iterator ->value.size(); i--;) {
858	Node* candidate = result.iterator ->value [i];
859	if (m_graph.m_ssaDominators ->dominates(candidate->owner, m_block)) {
860	ASSERT(candidate);
861	match ->replaceWith(m_graph, candidate);
862	match.setNode(candidate);
863	replaced = true;
864	break;
865	}
866	}
867	}
868	if (!replaced)
869	result.iterator ->value.append(match.asNode());
870	}
871	ASSERT(match.asNode());
872	m_node->replaceWith(m_graph, match.asNode());
873	m_changed = true;
874	}
875	}
876
877	struct ImpureBlockData {
878	ImpureBlockData()
879	: didVisit(false)
880	{
881	}
882
883	ClobberSet writes;
884	ImpureMap availableAtTail;
885	bool didVisit;
886	};
887
888	Vector<BasicBlock*> m_preOrder;
889
890	PureMultiMap m_pureValues;
891	BlockMap<ImpureBlockData> m_impureDataMap;
892
893	BasicBlock* m_block;
894	Node* m_node;
895	unsigned m_nodeIndex;
896	ImpureBlockData* m_impureData;
897	ClobberSet m_writesSoFar;
898	InsertionSet m_insertionSet;
899
900	bool m_changed;
901	};
902
903	} // anonymous namespace
904
905	bool performLocalCSE(Graph& graph)
906	{
907	return runPhase<LocalCSEPhase>(graph);
908	}
909
910	bool performGlobalCSE(Graph& graph)
911	{
912	return runPhase<GlobalCSEPhase>(graph);
913	}
914
915	} } // namespace JSC::DFG
916
917	#endif // ENABLE(DFG_JIT)
918

Browse the source code of webkit/Source/JavaScriptCore/dfg/DFGCSEPhase.cpp