AirAllocateRegistersByGraphColoring.cpp source code [webkit/Source/JavaScriptCore/b3/air/AirAllocateRegistersByGraphColoring.cpp]

1	/*
2	* Copyright (C) 2015-2017 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "AirAllocateRegistersByGraphColoring.h"
28
29	#if ENABLE(B3_JIT)
30
31	#include "AirCode.h"
32	#include "AirFixSpillsAfterTerminals.h"
33	#include "AirInsertionSet.h"
34	#include "AirInstInlines.h"
35	#include "AirLiveness.h"
36	#include "AirPadInterference.h"
37	#include "AirPhaseScope.h"
38	#include "AirTmpInlines.h"
39	#include "AirTmpWidth.h"
40	#include "AirUseCounts.h"
41	#include <wtf/ListDump.h>
42
43	namespace JSC { namespace B3 { namespace Air {
44
45	namespace {
46
47	bool debug = false;
48	bool traceDebug = false;
49	bool reportStats = false;
50
51	// The AbstractColoringAllocator defines all the code that is independant
52	// from the bank or register and can be shared when allocating registers.
53	template<typename IndexType, typename TmpMapper>
54	class AbstractColoringAllocator {
55	public:
56	AbstractColoringAllocator(Code& code, const Vector<Reg>& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet<unsigned>& unspillableTmps, const UseCounts<Tmp>& useCounts)
57	: m_regsInPriorityOrder (regsInPriorityOrder)
58	, m_lastPrecoloredRegisterIndex(lastPrecoloredRegisterIndex)
59	, m_unspillableTmps(unspillableTmps)
60	, m_useCounts(useCounts)
61	, m_code(code)
62	{
63	initializeDegrees(tmpArraySize);
64
65	m_adjacencyList.resize(tmpArraySize);
66	m_moveList.resize(tmpArraySize);
67	m_coalescedTmps.fill(`0`, tmpArraySize);
68	m_isOnSelectStack.ensureSize(tmpArraySize);
69	m_spillWorklist.ensureSize(tmpArraySize);
70	}
71
72	protected:
73
74	unsigned registerCount() const { return m_regsInPriorityOrder.size(); }
75
76	IndexType getAlias(IndexType tmpIndex) const
77	{
78	IndexType alias = tmpIndex;
79	while (IndexType nextAlias = m_coalescedTmps[alias])
80	alias = nextAlias;
81	return alias;
82	}
83
84	void addEdge(IndexType a, IndexType b)
85	{
86	if (a == b)
87	return;
88	addEdgeDistinct(a, b);
89	}
90
91	void addToSpill(unsigned toSpill)
92	{
93	if (m_unspillableTmps.contains(toSpill))
94	return;
95
96	m_spillWorklist.add(toSpill);
97	}
98
99	bool isPrecolored(IndexType tmpIndex)
100	{
101	return tmpIndex <= m_lastPrecoloredRegisterIndex;
102	}
103
104	void initializeDegrees(unsigned tmpArraySize)
105	{
106	m_degrees.resize(tmpArraySize);
107
108	// All precolored registers have an "infinite" degree.
109	unsigned firstNonRegIndex = m_lastPrecoloredRegisterIndex + `1`;
110	for (unsigned i = `0`; i < firstNonRegIndex; ++i)
111	m_degrees[i] = std::numeric_limits<unsigned>::max();
112
113	memset(m_degrees.data() + firstNonRegIndex, `0`, (tmpArraySize - firstNonRegIndex) * sizeof(unsigned));
114	}
115
116	void addEdgeDistinct(IndexType a, IndexType b)
117	{
118	ASSERT(a != b);
119	bool isNewEdge = addInterferenceEdge(InterferenceEdge(a, b));
120	if (isNewEdge) {
121	if (!isPrecolored(a)) {
122	ASSERT(!m_adjacencyList[a].contains(b));
123	m_adjacencyList[a].append(b);
124	m_degrees[a]++;
125	}
126
127	if (!isPrecolored(b)) {
128	ASSERT(!m_adjacencyList[b].contains(a));
129	m_adjacencyList[b].append(a);
130	m_degrees[b]++;
131	}
132	}
133	}
134
135	bool addEdgeDistinctWithoutDegreeChange(IndexType a, IndexType b)
136	{
137	ASSERT(a != b);
138	bool isNewEdge = addInterferenceEdge(InterferenceEdge(a, b));
139	if (isNewEdge) {
140	if (!isPrecolored(a)) {
141	ASSERT(!m_adjacencyList[a].contains(b));
142	m_adjacencyList[a].append(b);
143	}
144
145	if (!isPrecolored(b)) {
146	ASSERT(!m_adjacencyList[b].contains(a));
147	m_adjacencyList[b].append(a);
148	}
149	return true;
150	}
151	return false;
152	}
153
154	template<typename Function>
155	void forEachAdjacent(IndexType tmpIndex, Function function)
156	{
157	for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) {
158	if (!hasBeenSimplified(adjacentTmpIndex))
159	function(adjacentTmpIndex);
160	}
161	}
162
163	bool hasBeenSimplified(IndexType tmpIndex)
164	{
165	if (!ASSERT_DISABLED) {
166	if (!!m_coalescedTmps[tmpIndex])
167	ASSERT(getAlias(tmpIndex) != tmpIndex);
168	}
169
170	return m_isOnSelectStack.quickGet(tmpIndex) \|\| !!m_coalescedTmps[tmpIndex];
171	}
172
173	bool canBeSafelyCoalesced(IndexType u, IndexType v)
174	{
175	ASSERT(!isPrecolored(v));
176	if (isPrecolored(u))
177	return precoloredCoalescingHeuristic(u, v);
178	return conservativeHeuristic(u, v);
179	}
180
181	bool conservativeHeuristic(IndexType u, IndexType v)
182	{
183	// This is using the Briggs' conservative coalescing rule:
184	// If the number of combined adjacent node with a degree >= K is less than K,
185	// it is safe to combine the two nodes. The reason is that we know that if the graph
186	// is colorable, we have fewer than K adjacents with high order and there is a color
187	// for the current node.
188	ASSERT(u != v);
189	ASSERT(!isPrecolored(u));
190	ASSERT(!isPrecolored(v));
191
192	const auto& adjacentsOfU = m_adjacencyList[u];
193	const auto& adjacentsOfV = m_adjacencyList[v];
194
195	if (adjacentsOfU.size() + adjacentsOfV.size() < registerCount()) {
196	// Shortcut: if the total number of adjacents is less than the number of register, the condition is always met.
197	return true;
198	}
199
200	HashSet<IndexType> highOrderAdjacents;
201
202	for (IndexType adjacentTmpIndex : adjacentsOfU) {
203	ASSERT(adjacentTmpIndex != v);
204	ASSERT(adjacentTmpIndex != u);
205	if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= registerCount()) {
206	auto addResult = highOrderAdjacents.add(adjacentTmpIndex);
207	if (addResult.isNewEntry && highOrderAdjacents.size() >= registerCount())
208	return false;
209	}
210	}
211	for (IndexType adjacentTmpIndex : adjacentsOfV) {
212	ASSERT(adjacentTmpIndex != u);
213	ASSERT(adjacentTmpIndex != v);
214	if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= registerCount()) {
215	auto addResult = highOrderAdjacents.add(adjacentTmpIndex);
216	if (addResult.isNewEntry && highOrderAdjacents.size() >= registerCount())
217	return false;
218	}
219	}
220
221	ASSERT(highOrderAdjacents.size() < registerCount());
222	return true;
223	}
224
225	bool precoloredCoalescingHeuristic(IndexType u, IndexType v)
226	{
227	if (traceDebug)
228	dataLog(" Checking precoloredCoalescingHeuristic\n");
229	ASSERT(isPrecolored(u));
230	ASSERT(!isPrecolored(v));
231
232	// If any adjacent of the non-colored node is not an adjacent of the colored node AND has a degree >= K
233	// there is a risk that this node needs to have the same color as our precolored node. If we coalesce such
234	// move, we may create an uncolorable graph.
235	const auto& adjacentsOfV = m_adjacencyList[v];
236	for (unsigned adjacentTmpIndex : adjacentsOfV) {
237	if (!isPrecolored(adjacentTmpIndex)
238	&& !hasBeenSimplified(adjacentTmpIndex)
239	&& m_degrees[adjacentTmpIndex] >= registerCount()
240	&& !hasInterferenceEdge(InterferenceEdge(u, adjacentTmpIndex)))
241	return false;
242	}
243	return true;
244	}
245
246	void addBias(IndexType u, IndexType v)
247	{
248	// We implement biased coloring as proposed by Briggs. See section
249	// 5.3.3 of his thesis for more information: http://www.cs.utexas.edu/users/mckinley/380C/lecs/briggs-thesis-1992.pdf
250	// The main idea of biased coloring is this:
251	// We try to coalesce a move between u and v, but the conservative heuristic
252	// fails. We don't want coalesce the move because we don't want to risk
253	// creating an uncolorable graph. However, if the conservative heuristic fails,
254	// it's not proof that the graph is uncolorable if the move were indeed coalesced.
255	// So, when we go to color the tmps, we'll remember that we really want the
256	// same register for u and v, and if legal, we will assign them to the same register.
257	if (!isPrecolored(u))
258	m_biases.add(u, IndexTypeSet()).iterator->value.add(v);
259	if (!isPrecolored(v))
260	m_biases.add(v, IndexTypeSet()).iterator->value.add(u);
261	}
262
263	IndexType selectSpill()
264	{
265	if (!m_hasSelectedSpill) {
266	m_hasSelectedSpill = true;
267
268	if (m_hasCoalescedNonTrivialMove)
269	m_coalescedTmpsAtSpill = m_coalescedTmps;
270	}
271
272	auto iterator = m_spillWorklist.begin();
273
274	RELEASE_ASSERT_WITH_MESSAGE(iterator != m_spillWorklist.end(), "selectSpill() called when there was no spill.");
275	RELEASE_ASSERT_WITH_MESSAGE(!m_unspillableTmps.contains(*iterator), "trying to spill unspillable tmp");
276
277	// Higher score means more desirable to spill. Lower scores maximize the likelihood that a tmp
278	// gets a register.
279	auto score = [&] (Tmp tmp) -> double {
280	// Air exposes the concept of "fast tmps", and we interpret that to mean that the tmp
281	// should always be in a register.
282	if (m_code.isFastTmp(tmp))
283	return `0`;
284
285	// All else being equal, the score should be directly related to the degree.
286	double degree = static_cast<double>(m_degrees[TmpMapper::absoluteIndex(tmp)]);
287
288	// All else being equal, the score should be inversely related to the number of warm uses and
289	// defs.
290	const UseCounts<Tmp>::Counts* counts = m_useCounts[tmp];
291	if (!counts)
292	return std::numeric_limits<double>::infinity();
293
294	double uses = counts->numWarmUses + counts->numDefs;
295
296	// If it's a constant, then it's not as bad to spill. We can rematerialize it in many
297	// cases.
298	if (counts->numConstDefs == `1` && counts->numDefs == `1`)
299	uses /= `2`;
300
301	return degree / uses;
302	};
303
304	auto victimIterator = iterator;
305	double maxScore = score(TmpMapper::tmpFromAbsoluteIndex(*iterator));
306
307	++iterator;
308	for (;iterator != m_spillWorklist.end(); ++iterator) {
309	double tmpScore = score(TmpMapper::tmpFromAbsoluteIndex(*iterator));
310	if (tmpScore > maxScore) {
311	ASSERT(!m_unspillableTmps.contains(*iterator));
312	victimIterator = iterator;
313	maxScore = tmpScore;
314	}
315	}
316
317	IndexType victimIndex = *victimIterator;
318	if (traceDebug)
319	dataLogLn("Selecting spill ", victimIndex);
320	ASSERT(!isPrecolored(victimIndex));
321	return victimIndex;
322	}
323
324	void assignColors()
325	{
326	ASSERT(m_simplifyWorklist.isEmpty());
327	ASSERT(m_spillWorklist.isEmpty());
328
329	// Reclaim as much memory as possible.
330	m_interferenceEdges.clear();
331
332	m_degrees.clear();
333	m_moveList.clear();
334	m_simplifyWorklist.clear();
335	m_spillWorklist.clearAll();
336
337	// Try to color the Tmp on the stack.
338	m_coloredTmp.resize(m_adjacencyList.size());
339
340	{
341	Vector<IndexType, `4`> nowAliasedBiases;
342	for (IndexType key : m_biases.keys()) {
343	if (key != getAlias(key))
344	nowAliasedBiases.append(key);
345	}
346	for (IndexType key : nowAliasedBiases) {
347	IndexTypeSet keysBiases(m_biases.take(key));
348	auto addResult = m_biases.add(getAlias(key), IndexTypeSet());
349	if (addResult.isNewEntry) {
350	ASSERT(!addResult.iterator->value.size());
351	addResult.iterator->value = WTFMove(keysBiases);
352	} else {
353	IndexTypeSet& setToAddTo = addResult.iterator->value;
354	for (IndexType tmp : keysBiases)
355	setToAddTo.add(tmp);
356	}
357	}
358	}
359
360	while (!m_selectStack.isEmpty()) {
361	unsigned tmpIndex = m_selectStack.takeLast();
362	ASSERT(!isPrecolored(tmpIndex));
363	ASSERT(!m_coloredTmp[tmpIndex]);
364	ASSERT(getAlias(tmpIndex) == tmpIndex);
365
366	RegisterSet coloredRegisters;
367	for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) {
368	IndexType aliasTmpIndex = getAlias(adjacentTmpIndex);
369	Reg reg = m_coloredTmp[aliasTmpIndex];
370
371	ASSERT(!isPrecolored(aliasTmpIndex) \|\| (isPrecolored(aliasTmpIndex) && reg));
372
373	if (reg)
374	coloredRegisters.set(reg);
375	}
376
377	bool colorAssigned = false;
378	auto iter = m_biases.find(tmpIndex);
379	if (iter != m_biases.end()) {
380	for (IndexType desiredBias : iter->value) {
381	if (Reg desiredColor = m_coloredTmp[getAlias(desiredBias)]) {
382	if (!coloredRegisters.get(desiredColor)) {
383	m_coloredTmp[tmpIndex] = desiredColor;
384	colorAssigned = true;
385	break;
386	}
387	}
388	}
389	}
390	if (!colorAssigned) {
391	for (Reg reg : m_regsInPriorityOrder) {
392	if (!coloredRegisters.get(reg)) {
393	m_coloredTmp[tmpIndex] = reg;
394	colorAssigned = true;
395	break;
396	}
397	}
398	}
399
400	if (!colorAssigned)
401	m_spilledTmps.append(tmpIndex);
402	}
403
404	m_selectStack.clear();
405
406	if (m_spilledTmps.isEmpty())
407	m_coalescedTmpsAtSpill.clear();
408	else
409	m_coloredTmp.clear();
410	}
411
412	void dumpInterferenceGraphInDot(PrintStream& out)
413	{
414	out.print("graph InterferenceGraph { \n");
415
416	HashSet<Tmp> tmpsWithInterferences;
417	for (const auto& edge : m_interferenceEdges) {
418	tmpsWithInterferences.add(TmpMapper::tmpFromAbsoluteIndex(edge.first()));
419	tmpsWithInterferences.add(TmpMapper::tmpFromAbsoluteIndex(edge.second()));
420	}
421
422	for (const auto& tmp : tmpsWithInterferences) {
423	unsigned tmpIndex = TmpMapper::absoluteIndex(tmp);
424	if (tmpIndex < m_degrees.size())
425	out.print(" ", tmp.internalValue(), " [label=\"", tmp, " (", m_degrees[tmpIndex], ")\"];\n");
426	else
427	out.print(" ", tmp.internalValue(), " [label=\"", tmp, "\"];\n");
428	}
429
430	for (const auto& edge : m_interferenceEdges)
431	out.print(" ", edge.first(), " -- ", edge.second(), ";\n");
432	out.print("}\n");
433	}
434
435	// Interference edges are not directed. An edge between any two Tmps is represented
436	// by the concatenated values of the smallest Tmp followed by the bigger Tmp.
437	class InterferenceEdge {
438	public:
439	InterferenceEdge()
440	{
441	}
442
443	InterferenceEdge(IndexType a, IndexType b)
444	{
445	ASSERT(a);
446	ASSERT(b);
447	ASSERT_WITH_MESSAGE(a != b, "A Tmp can never interfere with itself. Doing so would force it to be the superposition of two registers.");
448
449	if (b < a)
450	std::swap(a, b);
451	m_value = static_cast<uint64_t>(a) << `32` \| b;
452	}
453
454	InterferenceEdge(WTF::HashTableDeletedValueType)
455	: m_value(std::numeric_limits<uint64_t>::max())
456	{
457	}
458
459	IndexType first() const
460	{
461	return m_value >> `32` & `0xffffffff`;
462	}
463
464	IndexType second() const
465	{
466	return m_value & `0xffffffff`;
467	}
468
469	bool operator==(const InterferenceEdge& other) const
470	{
471	return m_value == other.m_value;
472	}
473
474	bool isHashTableDeletedValue() const
475	{
476	return *this == InterferenceEdge(WTF::HashTableDeletedValue);
477	}
478
479	unsigned hash() const
480	{
481	return WTF::IntHash<uint64_t>::hash(m_value);
482	}
483
484	void dump(PrintStream& out) const
485	{
486	out.print(first(), "<=>", second());
487	}
488
489	private:
490	uint64_t m_value { `0` };
491	};
492
493	bool addInterferenceEdge(InterferenceEdge edge)
494	{
495	return m_interferenceEdges.add(edge).isNewEntry;
496	}
497
498	bool hasInterferenceEdge(InterferenceEdge edge)
499	{
500	return m_interferenceEdges.contains(edge);
501	}
502
503	struct InterferenceEdgeHash {
504	static unsigned hash(const InterferenceEdge& key) { return key.hash(); }
505	static bool equal(const InterferenceEdge& a, const InterferenceEdge& b) { return a == b; }
506	static const bool safeToCompareToEmptyOrDeleted = true;
507	};
508	typedef SimpleClassHashTraits<InterferenceEdge> InterferenceEdgeHashTraits;
509
510	Vector<Reg> m_regsInPriorityOrder;
511	IndexType m_lastPrecoloredRegisterIndex { `0` };
512
513	// The interference graph.
514	HashSet<InterferenceEdge, InterferenceEdgeHash, InterferenceEdgeHashTraits> m_interferenceEdges;
515
516	Vector<Vector<IndexType, `0`, UnsafeVectorOverflow, `4`>, `0`, UnsafeVectorOverflow> m_adjacencyList;
517	Vector<IndexType, `0`, UnsafeVectorOverflow> m_degrees;
518
519	using IndexTypeSet = HashSet<IndexType, typename DefaultHash<IndexType>::Hash, WTF::UnsignedWithZeroKeyHashTraits<IndexType>>;
520
521	HashMap<IndexType, IndexTypeSet, typename DefaultHash<IndexType>::Hash, WTF::UnsignedWithZeroKeyHashTraits<IndexType>> m_biases;
522
523	// Instead of keeping track of the move instructions, we just keep their operands around and use the index
524	// in the vector as the "identifier" for the move.
525	struct MoveOperands {
526	IndexType srcIndex;
527	IndexType dstIndex;
528	};
529	Vector<MoveOperands, `0`, UnsafeVectorOverflow> m_coalescingCandidates;
530
531	// List of every move instruction associated with a Tmp.
532	Vector<IndexTypeSet> m_moveList;
533
534	// Colors.
535	Vector<Reg, `0`, UnsafeVectorOverflow> m_coloredTmp;
536	Vector<IndexType> m_spilledTmps;
537
538	// Values that have been coalesced with an other value.
539	Vector<IndexType, `0`, UnsafeVectorOverflow> m_coalescedTmps;
540
541	// The stack of Tmp removed from the graph and ready for coloring.
542	BitVector m_isOnSelectStack;
543	Vector<IndexType> m_selectStack;
544
545	// Low-degree, non-Move related.
546	Vector<IndexType> m_simplifyWorklist;
547	// High-degree Tmp.
548	BitVector m_spillWorklist;
549
550	bool m_hasSelectedSpill { false };
551	bool m_hasCoalescedNonTrivialMove { false };
552
553	// The mapping of Tmp to their alias for Moves that are always coalescing regardless of spilling.
554	Vector<IndexType, `0`, UnsafeVectorOverflow> m_coalescedTmpsAtSpill;
555
556	const HashSet<unsigned>& m_unspillableTmps;
557	const UseCounts<Tmp>& m_useCounts;
558	Code& m_code;
559
560	Vector<Tmp, `4`> m_pinnedRegs;
561	};
562
563	template <typename IndexType, typename TmpMapper>
564	class Briggs : public AbstractColoringAllocator<IndexType, TmpMapper> {
565	using Base = AbstractColoringAllocator<IndexType, TmpMapper>;
566	protected:
567	using Base::m_isOnSelectStack;
568	using Base::m_selectStack;
569	using Base::m_simplifyWorklist;
570	using Base::m_spillWorklist;
571	using Base::m_hasSelectedSpill;
572	using Base::m_hasCoalescedNonTrivialMove;
573	using Base::m_degrees;
574	using Base::registerCount;
575	using Base::m_coalescedTmps;
576	using Base::m_moveList;
577	using Base::m_useCounts;
578	using Base::m_coalescedTmpsAtSpill;
579	using Base::m_spilledTmps;
580	using MoveOperands = typename Base::MoveOperands;
581	using Base::m_coalescingCandidates;
582	using Base::m_lastPrecoloredRegisterIndex;
583	using Base::m_coloredTmp;
584	using Base::m_code;
585	using InterferenceEdge = typename Base::InterferenceEdge;
586	using Base::m_unspillableTmps;
587	using Base::hasInterferenceEdge;
588	using Base::getAlias;
589	using Base::addEdge;
590	using Base::isPrecolored;
591	using Base::canBeSafelyCoalesced;
592	using Base::addEdgeDistinctWithoutDegreeChange;
593	using Base::forEachAdjacent;
594	using Base::hasBeenSimplified;
595	using Base::addToSpill;
596	using Base::m_interferenceEdges;
597	using Base::addBias;
598	using Base::m_pinnedRegs;
599	using Base::m_regsInPriorityOrder;
600
601	public:
602	Briggs(Code& code, const Vector<Reg>& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet<unsigned>& unspillableTmps, const UseCounts<Tmp>& useCounts)
603	: Base(code, regsInPriorityOrder, lastPrecoloredRegisterIndex, tmpArraySize, unspillableTmps, useCounts)
604	{
605	}
606
607	void allocate()
608	{
609	bool changed = false;
610
611	auto coalesceMove = [&] (unsigned& move) {
612	bool coalesced = coalesce(move);
613	if (coalesced) {
614	changed = true;
615	// We won't need to handle this move in the future since it's already coalesced.
616	move = UINT_MAX;
617	}
618	};
619
620	// We first coalesce until we can't coalesce any more.
621	do {
622	changed = false;
623	m_worklistMoves.forEachMove(coalesceMove);
624	} while (changed);
625	do {
626	changed = false;
627	m_worklistMoves.forEachLowPriorityMove(coalesceMove);
628	} while (changed);
629
630	// Then we create our select stack. The invariant we start with here is that nodes in
631	// the interference graph with degree >= k are on the spill list. Nodes with degree < k
632	// are on the simplify worklist. A node can move from the spill list to the simplify
633	// list (but not the other way around, note that this is different than IRC because IRC
634	// runs this while coalescing, but we do all our coalescing before this). Once a node is
635	// added to the select stack, it's not on either list, but only on the select stack.
636	// Once on the select stack, logically, it's no longer in the interference graph.
637	auto assertInvariants = [&] () {
638	if (ASSERT_DISABLED)
639	return;
640	if (!shouldValidateIRAtEachPhase())
641	return;
642
643	IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + `1`;
644	unsigned registerCount = this->registerCount();
645	for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) {
646	if (getAlias(i) != i)
647	continue;
648	if (m_isOnSelectStack.contains(i)) {
649	ASSERT(!m_simplifyWorklist.contains(i) && !m_spillWorklist.contains(i));
650	continue;
651	}
652	unsigned degree = m_degrees[i];
653	if (degree >= registerCount) {
654	ASSERT(m_unspillableTmps.contains(i) \|\| m_spillWorklist.contains(i));
655	ASSERT(!m_simplifyWorklist.contains(i));
656	continue;
657	}
658	ASSERT(m_simplifyWorklist.contains(i));
659	}
660	};
661
662	makeInitialWorklist();
663	assertInvariants();
664	do {
665	changed = false;
666
667	while (m_simplifyWorklist.size()) {
668	simplify();
669	assertInvariants();
670	}
671
672	if (!m_spillWorklist.isEmpty()) {
673	selectSpill();
674	changed = true;
675	ASSERT(m_simplifyWorklist.size() == `1`);
676	}
677	} while (changed);
678
679	if (!ASSERT_DISABLED) {
680	ASSERT(!m_simplifyWorklist.size());
681	ASSERT(m_spillWorklist.isEmpty());
682	IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + `1`;
683	for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i)
684	ASSERT(hasBeenSimplified(i));
685	}
686
687	assignColors();
688	}
689
690	protected:
691
692	bool coalesce(unsigned& moveIndex)
693	{
694	const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex];
695	IndexType u = getAlias(moveOperands.srcIndex);
696	IndexType v = getAlias(moveOperands.dstIndex);
697
698	if (isPrecolored(v))
699	std::swap(u, v);
700
701	if (traceDebug)
702	dataLog("Coalescing move at index ", moveIndex, " u = ", TmpMapper::tmpFromAbsoluteIndex(u), " v = ", TmpMapper::tmpFromAbsoluteIndex(v), " ");
703
704	if (u == v) {
705	if (traceDebug)
706	dataLog("Already Coalesced. They're equal.\n");
707	return false;
708	}
709
710	if (isPrecolored(v)
711	\|\| hasInterferenceEdge(InterferenceEdge(u, v))) {
712
713	// No need to ever consider this move again if it interferes.
714	// No coalescing will remove the interference.
715	moveIndex = UINT_MAX;
716
717	if (!ASSERT_DISABLED) {
718	if (isPrecolored(v))
719	ASSERT(isPrecolored(u));
720	}
721
722	if (traceDebug)
723	dataLog("Constrained\n");
724
725	return false;
726	}
727
728	if (canBeSafelyCoalesced(u, v)) {
729	combine(u, v);
730	m_hasCoalescedNonTrivialMove = true;
731
732	if (traceDebug)
733	dataLog(" Safe Coalescing\n");
734	return true;
735	}
736
737	addBias(u, v);
738
739	if (traceDebug)
740	dataLog(" Failed coalescing.\n");
741
742	return false;
743	}
744
745	void combine(IndexType u, IndexType v)
746	{
747	ASSERT(!m_coalescedTmps[v]);
748	m_coalescedTmps[v] = u;
749
750	auto& vMoves = m_moveList[v];
751	m_moveList[u].add(vMoves.begin(), vMoves.end());
752
753	forEachAdjacent(v, [this, u] (IndexType adjacentTmpIndex) {
754	if (addEdgeDistinctWithoutDegreeChange(adjacentTmpIndex, u)) {
755	// If we added a new edge between the adjacentTmp and u, it replaces the edge
756	// that existed with v.
757	// The degree of adjacentTmp remains the same since the edge just changed from u to v.
758	// All we need to do is update the degree of u.
759	if (!isPrecolored(u))
760	m_degrees[u]++;
761	} else {
762	// If we already had an edge between the adjacentTmp and u, the degree of u
763	// is already correct. The degree of the adjacentTmp decreases since the edge
764	// with v is no longer relevant (we can think of it as merged with the edge with u).
765	decrementDegree(adjacentTmpIndex);
766	}
767	});
768	}
769
770
771	void makeInitialWorklist()
772	{
773	m_simplifyWorklist.clear();
774	m_spillWorklist.clearAll();
775
776	if (traceDebug)
777	dataLogLn("------------------\nMaking initial worklist");
778
779	IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + `1`;
780	unsigned registerCount = this->registerCount();
781	for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) {
782	ASSERT(!isPrecolored(i));
783	if (hasBeenSimplified(i))
784	continue;
785	unsigned degree = m_degrees[i];
786	if (degree < registerCount) {
787	if (traceDebug)
788	dataLogLn("Adding ", TmpMapper::tmpFromAbsoluteIndex(i), " to simplify worklist");
789	m_simplifyWorklist.append(i);
790	} else {
791	if (traceDebug)
792	dataLogLn("Adding ", TmpMapper::tmpFromAbsoluteIndex(i), " to spill worklist");
793	addToSpill(i);
794	}
795	}
796	}
797
798	// Low-degree vertex can always be colored: just pick any of the color taken by any
799	// other adjacent verices.
800	// The "Simplify" phase takes a low-degree out of the interference graph to simplify it.
801	void simplify()
802	{
803	IndexType lastIndex = m_simplifyWorklist.takeLast();
804
805	ASSERT(!m_selectStack.contains(lastIndex));
806	ASSERT(!m_isOnSelectStack.get(lastIndex));
807	ASSERT(!m_spillWorklist.contains(lastIndex));
808
809	m_selectStack.append(lastIndex);
810	m_isOnSelectStack.quickSet(lastIndex);
811
812	if (traceDebug)
813	dataLogLn("Simplifying ", lastIndex, " by adding it to select stack");
814
815	forEachAdjacent(lastIndex, [this](IndexType adjacentTmpIndex) {
816	decrementDegreeInSimplification(adjacentTmpIndex);
817	});
818	}
819
820	void selectSpill()
821	{
822	IndexType victimIndex = Base::selectSpill();
823	m_spillWorklist.quickClear(victimIndex);
824	m_simplifyWorklist.append(victimIndex);
825	}
826
827	struct MoveSet {
828	unsigned addMove()
829	{
830	ASSERT(m_lowPriorityMoveList.isEmpty());
831
832	unsigned nextIndex = m_positionInMoveList++;
833	m_moveList.append(nextIndex);
834	return nextIndex;
835	}
836
837	void startAddingLowPriorityMoves()
838	{
839	ASSERT(m_lowPriorityMoveList.isEmpty());
840	}
841
842	unsigned addLowPriorityMove()
843	{
844	unsigned nextIndex = m_positionInMoveList++;
845	m_lowPriorityMoveList.append(nextIndex);
846
847	return nextIndex;
848	}
849
850	// We use references to moves here because the function
851	// may do coalescing, indicating it doesn't need to see
852	// the move again.
853	template <typename Function>
854	void forEachMove(Function function)
855	{
856	for (unsigned& move : m_moveList) {
857	if (move != UINT_MAX)
858	function(move);
859	}
860	}
861
862	template <typename Function>
863	void forEachLowPriorityMove(Function function)
864	{
865	for (unsigned& move : m_lowPriorityMoveList) {
866	if (move != UINT_MAX)
867	function(move);
868	}
869	}
870
871	void clear()
872	{
873	m_positionInMoveList = `0`;
874	m_moveList.clear();
875	m_lowPriorityMoveList.clear();
876	}
877
878	private:
879	unsigned m_positionInMoveList;
880	Vector<unsigned, `0`, UnsafeVectorOverflow> m_moveList;
881	Vector<unsigned, `0`, UnsafeVectorOverflow> m_lowPriorityMoveList;
882	};
883
884	void decrementDegree(IndexType tmpIndex)
885	{
886	ASSERT(m_degrees[tmpIndex]);
887	--m_degrees[tmpIndex];
888	}
889
890	void decrementDegreeInSimplification(IndexType tmpIndex)
891	{
892	ASSERT(m_degrees[tmpIndex]);
893	unsigned oldDegree = m_degrees[tmpIndex]--;
894
895	if (oldDegree == registerCount()) {
896	ASSERT(m_degrees[tmpIndex] < registerCount());
897	if (traceDebug)
898	dataLogLn("Moving tmp ", tmpIndex, " from spill list to simplify list because it's degree is now less than k");
899
900	if (!ASSERT_DISABLED)
901	ASSERT(m_unspillableTmps.contains(tmpIndex) \|\| m_spillWorklist.contains(tmpIndex));
902	m_spillWorklist.quickClear(tmpIndex);
903
904	ASSERT(!m_simplifyWorklist.contains(tmpIndex));
905	m_simplifyWorklist.append(tmpIndex);
906	}
907	}
908
909	void assignColors()
910	{
911	m_worklistMoves.clear();
912	Base::assignColors();
913	}
914
915	// Set of "move" enabled for possible coalescing.
916	MoveSet m_worklistMoves;
917	};
918
919	template <typename IndexType, typename TmpMapper>
920	class IRC : public AbstractColoringAllocator<IndexType, TmpMapper> {
921	using Base = AbstractColoringAllocator<IndexType, TmpMapper>;
922	protected:
923	using Base::m_isOnSelectStack;
924	using Base::m_selectStack;
925	using Base::m_simplifyWorklist;
926	using Base::m_spillWorklist;
927	using Base::m_hasSelectedSpill;
928	using Base::m_hasCoalescedNonTrivialMove;
929	using Base::m_degrees;
930	using Base::registerCount;
931	using Base::m_coalescedTmps;
932	using Base::m_moveList;
933	using Base::m_useCounts;
934	using Base::m_coalescedTmpsAtSpill;
935	using Base::m_spilledTmps;
936	using MoveOperands = typename Base::MoveOperands;
937	using Base::m_coalescingCandidates;
938	using Base::m_lastPrecoloredRegisterIndex;
939	using Base::m_coloredTmp;
940	using Base::m_code;
941	using InterferenceEdge = typename Base::InterferenceEdge;
942	using Base::m_unspillableTmps;
943	using Base::hasInterferenceEdge;
944	using Base::getAlias;
945	using Base::addEdge;
946	using Base::isPrecolored;
947	using Base::canBeSafelyCoalesced;
948	using Base::addEdgeDistinctWithoutDegreeChange;
949	using Base::forEachAdjacent;
950	using Base::hasBeenSimplified;
951	using Base::addToSpill;
952	using Base::m_interferenceEdges;
953	using Base::m_adjacencyList;
954	using Base::dumpInterferenceGraphInDot;
955	using Base::addBias;
956	using Base::m_pinnedRegs;
957	using Base::m_regsInPriorityOrder;
958
959	public:
960	IRC(Code& code, const Vector<Reg>& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet<unsigned>& unspillableTmps, const UseCounts<Tmp>& useCounts)
961	: Base(code, regsInPriorityOrder, lastPrecoloredRegisterIndex, tmpArraySize, unspillableTmps, useCounts)
962	{
963	}
964
965	void allocate()
966	{
967	m_activeMoves.ensureSize(m_worklistMoves.totalNumberOfMoves());
968	ASSERT_WITH_MESSAGE(m_activeMoves.size() >= m_coalescingCandidates.size(), "The activeMove set should be big enough for the quick operations of BitVector.");
969
970	makeWorkList();
971
972	if (debug) {
973	dumpInterferenceGraphInDot(WTF::dataFile());
974	dataLog("Coalescing candidates:\n");
975	for (MoveOperands& moveOp : m_coalescingCandidates) {
976	dataLog(" ", TmpMapper::tmpFromAbsoluteIndex(moveOp.srcIndex),
977	" -> ", TmpMapper::tmpFromAbsoluteIndex(moveOp.dstIndex), "\n");
978	}
979	dataLog("Initial work list\n");
980	dumpWorkLists(WTF::dataFile());
981	}
982
983	do {
984	if (traceDebug) {
985	dataLog("Before Graph simplification iteration\n");
986	dumpWorkLists(WTF::dataFile());
987	}
988
989	if (!m_simplifyWorklist.isEmpty())
990	simplify();
991	else if (!m_worklistMoves.isEmpty())
992	coalesce();
993	else if (!m_freezeWorklist.isEmpty())
994	freeze();
995	else if (!m_spillWorklist.isEmpty())
996	selectSpill();
997
998	if (traceDebug) {
999	dataLog("After Graph simplification iteration\n");
1000	dumpWorkLists(WTF::dataFile());
1001	}
1002	} while (!m_simplifyWorklist.isEmpty() \|\| !m_worklistMoves.isEmpty() \|\| !m_freezeWorklist.isEmpty() \|\| !m_spillWorklist.isEmpty());
1003
1004	assignColors();
1005	}
1006
1007	protected:
1008
1009	void makeWorkList()
1010	{
1011	IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + `1`;
1012	for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) {
1013	unsigned degree = m_degrees[i];
1014	if (degree >= registerCount())
1015	addToSpill(i);
1016	else if (!m_moveList[i].isEmpty())
1017	m_freezeWorklist.add(i);
1018	else
1019	m_simplifyWorklist.append(i);
1020	}
1021	}
1022
1023	// Low-degree vertex can always be colored: just pick any of the color taken by any
1024	// other adjacent verices.
1025	// The "Simplify" phase takes a low-degree out of the interference graph to simplify it.
1026	void simplify()
1027	{
1028	IndexType lastIndex = m_simplifyWorklist.takeLast();
1029
1030	ASSERT(!m_selectStack.contains(lastIndex));
1031	ASSERT(!m_isOnSelectStack.get(lastIndex));
1032	m_selectStack.append(lastIndex);
1033	m_isOnSelectStack.quickSet(lastIndex);
1034
1035	forEachAdjacent(lastIndex, [this](IndexType adjacentTmpIndex) {
1036	decrementDegree(adjacentTmpIndex);
1037	});
1038	}
1039
1040	void coalesce()
1041	{
1042	unsigned moveIndex = m_worklistMoves.takeLastMove();
1043	const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex];
1044	IndexType u = getAlias(moveOperands.srcIndex);
1045	IndexType v = getAlias(moveOperands.dstIndex);
1046
1047	if (isPrecolored(v))
1048	std::swap(u, v);
1049
1050	if (traceDebug)
1051	dataLog("Coalescing move at index ", moveIndex, " u = ", u, " v = ", v, "\n");
1052
1053	if (u == v) {
1054	addWorkList(u);
1055
1056	if (traceDebug)
1057	dataLog(" Coalesced\n");
1058	} else if (isPrecolored(v)
1059	\|\| hasInterferenceEdge(InterferenceEdge(u, v))) {
1060	addWorkList(u);
1061	addWorkList(v);
1062
1063	if (traceDebug)
1064	dataLog(" Constrained\n");
1065	} else if (canBeSafelyCoalesced(u, v)) {
1066	combine(u, v);
1067	addWorkList(u);
1068	m_hasCoalescedNonTrivialMove = true;
1069
1070	if (traceDebug)
1071	dataLog(" Safe Coalescing\n");
1072	} else {
1073	m_activeMoves.quickSet(moveIndex);
1074	if (traceDebug)
1075	dataLog(" Failed coalescing, added to active moves.\n");
1076
1077	addBias(u, v);
1078	}
1079	}
1080
1081	void addWorkList(IndexType tmpIndex)
1082	{
1083	if (!isPrecolored(tmpIndex) && m_degrees[tmpIndex] < registerCount() && !isMoveRelated(tmpIndex)) {
1084	m_freezeWorklist.remove(tmpIndex);
1085	m_simplifyWorklist.append(tmpIndex);
1086	}
1087	}
1088
1089	void combine(IndexType u, IndexType v)
1090	{
1091	if (!m_freezeWorklist.remove(v))
1092	m_spillWorklist.quickClear(v);
1093
1094	ASSERT(!m_coalescedTmps[v]);
1095	m_coalescedTmps[v] = u;
1096
1097	auto& vMoves = m_moveList[v];
1098	m_moveList[u].add(vMoves.begin(), vMoves.end());
1099
1100	forEachAdjacent(v, [this, u] (IndexType adjacentTmpIndex) {
1101	if (addEdgeDistinctWithoutDegreeChange(adjacentTmpIndex, u)) {
1102	// If we added a new edge between the adjacentTmp and u, it replaces the edge
1103	// that existed with v.
1104	// The degree of adjacentTmp remains the same since the edge just changed from u to v.
1105	// All we need to do is update the degree of u.
1106	if (!isPrecolored(u))
1107	m_degrees[u]++;
1108	} else {
1109	// If we already had an edge between the adjacentTmp and u, the degree of u
1110	// is already correct. The degree of the adjacentTmp decreases since the edge
1111	// with v is no longer relevant (we can think of it as merged with the edge with u).
1112	decrementDegree(adjacentTmpIndex);
1113	}
1114	});
1115
1116	if (m_degrees[u] >= registerCount() && m_freezeWorklist.remove(u))
1117	addToSpill(u);
1118	}
1119
1120	void freeze()
1121	{
1122	IndexType victimIndex = m_freezeWorklist.takeAny();
1123	ASSERT_WITH_MESSAGE(getAlias(victimIndex) == victimIndex, "coalesce() should not leave aliased Tmp in the worklist.");
1124	m_simplifyWorklist.append(victimIndex);
1125	freezeMoves(victimIndex);
1126	}
1127
1128	void freezeMoves(IndexType tmpIndex)
1129	{
1130	forEachNodeMoves(tmpIndex, [this, tmpIndex] (IndexType moveIndex) {
1131	if (!m_activeMoves.quickClear(moveIndex))
1132	m_worklistMoves.takeMove(moveIndex);
1133
1134	const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex];
1135	IndexType srcTmpIndex = moveOperands.srcIndex;
1136	IndexType dstTmpIndex = moveOperands.dstIndex;
1137
1138	IndexType originalOtherTmp = srcTmpIndex != tmpIndex ? srcTmpIndex : dstTmpIndex;
1139	IndexType otherTmpIndex = getAlias(originalOtherTmp);
1140	if (m_degrees[otherTmpIndex] < registerCount() && !isMoveRelated(otherTmpIndex)) {
1141	if (m_freezeWorklist.remove(otherTmpIndex))
1142	m_simplifyWorklist.append(otherTmpIndex);
1143	}
1144	});
1145	}
1146
1147	void decrementDegree(IndexType tmpIndex)
1148	{
1149	ASSERT(m_degrees[tmpIndex]);
1150
1151	unsigned oldDegree = m_degrees[tmpIndex]--;
1152	if (oldDegree == registerCount()) {
1153	enableMovesOnValueAndAdjacents(tmpIndex);
1154	m_spillWorklist.quickClear(tmpIndex);
1155	if (isMoveRelated(tmpIndex))
1156	m_freezeWorklist.add(tmpIndex);
1157	else
1158	m_simplifyWorklist.append(tmpIndex);
1159	}
1160	}
1161
1162	void selectSpill()
1163	{
1164	IndexType victimIndex = Base::selectSpill();
1165	m_spillWorklist.quickClear(victimIndex);
1166	m_simplifyWorklist.append(victimIndex);
1167	freezeMoves(victimIndex);
1168	}
1169
1170	void assignColors()
1171	{
1172	ASSERT(m_freezeWorklist.isEmpty());
1173	m_worklistMoves.clear();
1174	Base::assignColors();
1175	}
1176
1177
1178	bool isMoveRelated(IndexType tmpIndex)
1179	{
1180	for (unsigned moveIndex : m_moveList[tmpIndex]) {
1181	if (m_activeMoves.quickGet(moveIndex) \|\| m_worklistMoves.contains(moveIndex))
1182	return true;
1183	}
1184	return false;
1185	}
1186
1187	template<typename Function>
1188	void forEachNodeMoves(IndexType tmpIndex, Function function)
1189	{
1190	for (unsigned moveIndex : m_moveList[tmpIndex]) {
1191	if (m_activeMoves.quickGet(moveIndex) \|\| m_worklistMoves.contains(moveIndex))
1192	function(moveIndex);
1193	}
1194	}
1195
1196	void enableMovesOnValue(IndexType tmpIndex)
1197	{
1198	for (unsigned moveIndex : m_moveList[tmpIndex]) {
1199	if (m_activeMoves.quickClear(moveIndex))
1200	m_worklistMoves.returnMove(moveIndex);
1201	}
1202	}
1203
1204	void enableMovesOnValueAndAdjacents(IndexType tmpIndex)
1205	{
1206	enableMovesOnValue(tmpIndex);
1207
1208	forEachAdjacent(tmpIndex, [this] (IndexType adjacentTmpIndex) {
1209	enableMovesOnValue(adjacentTmpIndex);
1210	});
1211	}
1212
1213	struct OrderedMoveSet {
1214	unsigned addMove()
1215	{
1216	ASSERT(m_lowPriorityMoveList.isEmpty());
1217	ASSERT(!m_firstLowPriorityMoveIndex);
1218
1219	unsigned nextIndex = m_positionInMoveList.size();
1220	unsigned position = m_moveList.size();
1221	m_moveList.append(nextIndex);
1222	m_positionInMoveList.append(position);
1223	return nextIndex;
1224	}
1225
1226	void startAddingLowPriorityMoves()
1227	{
1228	ASSERT(m_lowPriorityMoveList.isEmpty());
1229	m_firstLowPriorityMoveIndex = m_moveList.size();
1230	}
1231
1232	unsigned addLowPriorityMove()
1233	{
1234	ASSERT(m_firstLowPriorityMoveIndex == m_moveList.size());
1235
1236	unsigned nextIndex = m_positionInMoveList.size();
1237	unsigned position = m_lowPriorityMoveList.size();
1238	m_lowPriorityMoveList.append(nextIndex);
1239	m_positionInMoveList.append(position);
1240
1241	ASSERT(nextIndex >= m_firstLowPriorityMoveIndex);
1242
1243	return nextIndex;
1244	}
1245
1246	bool isEmpty() const
1247	{
1248	return m_moveList.isEmpty() && m_lowPriorityMoveList.isEmpty();
1249	}
1250
1251	bool contains(unsigned index)
1252	{
1253	return m_positionInMoveList[index] != std::numeric_limits<unsigned>::max();
1254	}
1255
1256	void takeMove(unsigned moveIndex)
1257	{
1258	unsigned positionInMoveList = m_positionInMoveList[moveIndex];
1259	if (positionInMoveList == std::numeric_limits<unsigned>::max())
1260	return;
1261
1262	if (moveIndex < m_firstLowPriorityMoveIndex) {
1263	ASSERT(m_moveList[positionInMoveList] == moveIndex);
1264	unsigned lastIndex = m_moveList.last();
1265	m_positionInMoveList[lastIndex] = positionInMoveList;
1266	m_moveList[positionInMoveList] = lastIndex;
1267	m_moveList.removeLast();
1268	} else {
1269	ASSERT(m_lowPriorityMoveList[positionInMoveList] == moveIndex);
1270	unsigned lastIndex = m_lowPriorityMoveList.last();
1271	m_positionInMoveList[lastIndex] = positionInMoveList;
1272	m_lowPriorityMoveList[positionInMoveList] = lastIndex;
1273	m_lowPriorityMoveList.removeLast();
1274	}
1275
1276	m_positionInMoveList[moveIndex] = std::numeric_limits<unsigned>::max();
1277
1278	ASSERT(!contains(moveIndex));
1279	}
1280
1281	unsigned takeLastMove()
1282	{
1283	ASSERT(!isEmpty());
1284
1285	unsigned lastIndex;
1286	if (!m_moveList.isEmpty()) {
1287	lastIndex = m_moveList.takeLast();
1288	ASSERT(m_positionInMoveList[lastIndex] == m_moveList.size());
1289	} else {
1290	lastIndex = m_lowPriorityMoveList.takeLast();
1291	ASSERT(m_positionInMoveList[lastIndex] == m_lowPriorityMoveList.size());
1292	}
1293	m_positionInMoveList[lastIndex] = std::numeric_limits<unsigned>::max();
1294
1295	ASSERT(!contains(lastIndex));
1296	return lastIndex;
1297	}
1298
1299	void returnMove(unsigned index)
1300	{
1301	// This assertion is a bit strict but that is how the move list should be used. The only kind of moves that can
1302	// return to the list are the ones that we previously failed to coalesce with the conservative heuristics.
1303	// Values should not be added back if they were never taken out when attempting coalescing.
1304	ASSERT(!contains(index));
1305
1306	if (index < m_firstLowPriorityMoveIndex) {
1307	unsigned position = m_moveList.size();
1308	m_moveList.append(index);
1309	m_positionInMoveList[index] = position;
1310	} else {
1311	unsigned position = m_lowPriorityMoveList.size();
1312	m_lowPriorityMoveList.append(index);
1313	m_positionInMoveList[index] = position;
1314	}
1315
1316	ASSERT(contains(index));
1317	}
1318
1319	void clear()
1320	{
1321	m_positionInMoveList.clear();
1322	m_moveList.clear();
1323	m_lowPriorityMoveList.clear();
1324	}
1325
1326	unsigned totalNumberOfMoves()
1327	{
1328	return m_moveList.size() + m_lowPriorityMoveList.size();
1329	}
1330
1331	private:
1332	Vector<unsigned, `0`, UnsafeVectorOverflow> m_positionInMoveList;
1333	Vector<unsigned, `0`, UnsafeVectorOverflow> m_moveList;
1334	Vector<unsigned, `0`, UnsafeVectorOverflow> m_lowPriorityMoveList;
1335	unsigned m_firstLowPriorityMoveIndex { `0` };
1336	};
1337
1338	void dumpWorkLists(PrintStream& out)
1339	{
1340	out.print("Simplify work list:\n");
1341	for (unsigned tmpIndex : m_simplifyWorklist)
1342	out.print(" ", TmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n");
1343	out.printf("Moves work list is empty? %d\n", m_worklistMoves.isEmpty());
1344	out.print("Freeze work list:\n");
1345	for (unsigned tmpIndex : m_freezeWorklist)
1346	out.print(" ", TmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n");
1347	out.print("Spill work list:\n");
1348	for (unsigned tmpIndex : m_spillWorklist)
1349	out.print(" ", TmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n");
1350	}
1351
1352	// Work lists.
1353	// Low-degree, Move related.
1354	HashSet<IndexType> m_freezeWorklist;
1355	// Set of "move" enabled for possible coalescing.
1356	OrderedMoveSet m_worklistMoves;
1357	// Set of "move" not yet ready for coalescing.
1358	BitVector m_activeMoves;
1359	};
1360
1361	// This perform all the tasks that are specific to certain register type.
1362	template<Bank bank, template<typename, typename> class AllocatorType>
1363	class ColoringAllocator : public AllocatorType<unsigned, AbsoluteTmpMapper<bank>> {
1364	using TmpMapper = AbsoluteTmpMapper<bank>;
1365	using Base = AllocatorType<unsigned, TmpMapper>;
1366	using Base::m_isOnSelectStack;
1367	using Base::m_selectStack;
1368	using Base::m_simplifyWorklist;
1369	using Base::m_spillWorklist;
1370	using Base::m_hasSelectedSpill;
1371	using Base::m_hasCoalescedNonTrivialMove;
1372	using Base::m_degrees;
1373	using Base::registerCount;
1374	using Base::m_coalescedTmps;
1375	using Base::m_moveList;
1376	using Base::m_useCounts;
1377	using Base::m_coalescedTmpsAtSpill;
1378	using Base::m_spilledTmps;
1379	using MoveOperands = typename Base::MoveOperands;
1380	using Base::m_coalescingCandidates;
1381	using Base::m_lastPrecoloredRegisterIndex;
1382	using Base::m_coloredTmp;
1383	using Base::m_code;
1384	using InterferenceEdge = typename Base::InterferenceEdge;
1385	using Base::m_worklistMoves;
1386	using Base::hasInterferenceEdge;
1387	using Base::getAlias;
1388	using Base::addEdge;
1389	using Base::m_interferenceEdges;
1390	using Base::m_pinnedRegs;
1391	using Base::m_regsInPriorityOrder;
1392
1393	public:
1394
1395	ColoringAllocator(Code& code, TmpWidth& tmpWidth, const UseCounts<Tmp>& useCounts, const HashSet<unsigned>& unspillableTmp)
1396	: Base(code, code.regsInPriorityOrder(bank), TmpMapper::lastMachineRegisterIndex(), tmpArraySize(code), unspillableTmp, useCounts)
1397	, m_tmpWidth(tmpWidth)
1398	{
1399	for (Reg reg : code.pinnedRegisters()) {
1400	if ((bank == GP && reg.isGPR()) \|\| (bank == FP && reg.isFPR())) {
1401	m_pinnedRegs.append(Tmp (reg));
1402	ASSERT(!m_regsInPriorityOrder.contains(reg));
1403	m_regsInPriorityOrder.append(reg);
1404	}
1405	}
1406
1407	initializePrecoloredTmp();
1408	build();
1409	}
1410
1411	Tmp getAlias(Tmp tmp) const
1412	{
1413	return TmpMapper::tmpFromAbsoluteIndex(getAlias(TmpMapper::absoluteIndex(tmp)));
1414	}
1415
1416	// This tells you if a Move will be coalescable if the src and dst end up matching. This method
1417	// relies on an analysis that is invalidated by register allocation, so you it's only meaningful to
1418	// call this before* replacing the Tmp's in this Inst with registers or spill slots.*
1419	bool mayBeCoalescable(const Inst& inst) const
1420	{
1421	return mayBeCoalescableImpl(inst, &m_tmpWidth);
1422	}
1423
1424	bool isUselessMove(const Inst& inst) const
1425	{
1426	return mayBeCoalescableImpl(inst, nullptr) && inst.args [`0`].tmp() == inst.args [`1`].tmp();
1427	}
1428
1429	Tmp getAliasWhenSpilling(Tmp tmp) const
1430	{
1431	ASSERT_WITH_MESSAGE(!m_spilledTmps.isEmpty(), "This function is only valid for coalescing during spilling.");
1432
1433	if (m_coalescedTmpsAtSpill.isEmpty())
1434	return tmp;
1435
1436	unsigned aliasIndex = TmpMapper::absoluteIndex(tmp);
1437	while (unsigned nextAliasIndex = m_coalescedTmpsAtSpill[aliasIndex])
1438	aliasIndex = nextAliasIndex;
1439
1440	Tmp alias = TmpMapper::tmpFromAbsoluteIndex(aliasIndex);
1441
1442	ASSERT_WITH_MESSAGE(!m_spilledTmps.contains(aliasIndex) \|\| alias == tmp, "The aliases at spill should always be colorable. Something went horribly wrong.");
1443
1444	return alias;
1445	}
1446
1447	template<typename IndexIterator>
1448	class IndexToTmpIteratorAdaptor {
1449	public:
1450	IndexToTmpIteratorAdaptor(IndexIterator&& indexIterator)
1451	: m_indexIterator(WTFMove(indexIterator))
1452	{
1453	}
1454
1455	Tmp operator() const* { return TmpMapper::tmpFromAbsoluteIndex(*m_indexIterator); }
1456	IndexToTmpIteratorAdaptor& operator++() { ++m_indexIterator; return *this; }
1457
1458	bool operator==(const IndexToTmpIteratorAdaptor& other) const
1459	{
1460	return m_indexIterator == other.m_indexIterator;
1461	}
1462
1463	bool operator!=(const IndexToTmpIteratorAdaptor& other) const
1464	{
1465	return !(*this == other);
1466	}
1467
1468	private:
1469	IndexIterator m_indexIterator;
1470	};
1471
1472	template<typename Collection>
1473	class IndexToTmpIterableAdaptor {
1474	public:
1475	IndexToTmpIterableAdaptor(const Collection& collection)
1476	: m_collection(collection)
1477	{
1478	}
1479
1480	IndexToTmpIteratorAdaptor<typename Collection::const_iterator> begin() const
1481	{
1482	return m_collection.begin();
1483	}
1484
1485	IndexToTmpIteratorAdaptor<typename Collection::const_iterator> end() const
1486	{
1487	return m_collection.end();
1488	}
1489
1490	private:
1491	const Collection& m_collection;
1492	};
1493
1494	IndexToTmpIterableAdaptor<Vector<unsigned>> spilledTmps() const { return m_spilledTmps; }
1495
1496	bool requiresSpilling() const { return !m_spilledTmps.isEmpty(); }
1497
1498	Reg allocatedReg(Tmp tmp) const
1499	{
1500	ASSERT(!tmp.isReg());
1501	ASSERT(m_coloredTmp.size());
1502	ASSERT(tmp.isGP() == (bank == GP));
1503
1504	Reg reg = m_coloredTmp[TmpMapper::absoluteIndex(tmp)];
1505	if (!reg) {
1506	dataLog("FATAL: No color for ", tmp, "\n");
1507	dataLog("Code:\n");
1508	dataLog(m_code);
1509	RELEASE_ASSERT_NOT_REACHED();
1510	}
1511	return reg;
1512	}
1513
1514	protected:
1515	static unsigned tmpArraySize(Code& code)
1516	{
1517	unsigned numTmps = code.numTmps(bank);
1518	return TmpMapper::absoluteIndex(numTmps);
1519	}
1520
1521	void initializePrecoloredTmp()
1522	{
1523	m_coloredTmp.resize(m_lastPrecoloredRegisterIndex + `1`);
1524	for (unsigned i = `1`; i <= m_lastPrecoloredRegisterIndex; ++i) {
1525	Tmp tmp = TmpMapper::tmpFromAbsoluteIndex(i);
1526	ASSERT(tmp.isReg());
1527	m_coloredTmp[i] = tmp.reg();
1528	}
1529	}
1530
1531	bool mayBeCoalesced(Arg left, Arg right)
1532	{
1533	if (!left.isTmp() \|\| !right.isTmp())
1534	return false;
1535
1536	Tmp leftTmp = left.tmp();
1537	Tmp rightTmp = right.tmp();
1538
1539	if (leftTmp == rightTmp)
1540	return false;
1541
1542	if (leftTmp.isGP() != (bank == GP) \|\| rightTmp.isGP() != (bank == GP))
1543	return false;
1544
1545	unsigned leftIndex = TmpMapper::absoluteIndex(leftTmp);
1546	unsigned rightIndex = TmpMapper::absoluteIndex(rightTmp);
1547
1548	return !hasInterferenceEdge(InterferenceEdge(leftIndex, rightIndex));
1549	}
1550
1551	void addToLowPriorityCoalescingCandidates(Arg left, Arg right)
1552	{
1553	ASSERT(mayBeCoalesced(left, right));
1554	Tmp leftTmp = left.tmp();
1555	Tmp rightTmp = right.tmp();
1556
1557	unsigned leftIndex = TmpMapper::absoluteIndex(leftTmp);
1558	unsigned rightIndex = TmpMapper::absoluteIndex(rightTmp);
1559
1560	unsigned nextMoveIndex = m_coalescingCandidates.size();
1561	m_coalescingCandidates.append({ leftIndex, rightIndex });
1562
1563	unsigned newIndexInWorklist = m_worklistMoves.addLowPriorityMove();
1564	ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex);
1565
1566	m_moveList[leftIndex].add(nextMoveIndex);
1567	m_moveList[rightIndex].add(nextMoveIndex);
1568	}
1569
1570	void build()
1571	{
1572	m_coalescingCandidates.clear();
1573	m_worklistMoves.clear();
1574
1575	// FIXME: It seems like we don't need to recompute liveness. We just need to update its data
1576	// structures so that it knows that the newly introduced temporaries are not live at any basic
1577	// block boundary. This should trivially be true for now.
1578	TmpLiveness<bank> liveness(m_code);
1579	for (BasicBlock* block : m_code) {
1580	typename TmpLiveness<bank>::LocalCalc localCalc(liveness, block);
1581	for (unsigned instIndex = block->size(); instIndex--;) {
1582	Inst& inst = block->at(instIndex);
1583	Inst* nextInst = block->get(instIndex + `1`);
1584	build(&inst, nextInst, localCalc);
1585	localCalc.execute(instIndex);
1586	}
1587	build(nullptr, &block->at(`0`), localCalc);
1588	}
1589	buildLowPriorityMoveList();
1590	}
1591
1592	void build(Inst* prevInst, Inst* nextInst, const typename TmpLiveness<bank>::LocalCalc& localCalc)
1593	{
1594	if (traceDebug)
1595	dataLog("Building between ", pointerDump(prevInst), " and ", pointerDump(nextInst), ":\n");
1596
1597	Inst::forEachDefWithExtraClobberedRegs<Tmp>(
1598	prevInst, nextInst,
1599	[&] (const Tmp& arg, Arg::Role, Bank argBank, Width) {
1600	if (argBank != bank)
1601	return;
1602
1603	// All the Def()s interfere with each other and with all the extra clobbered Tmps.
1604	// We should not use forEachDefWithExtraClobberedRegs() here since colored Tmps
1605	// do not need interference edges in our implementation.
1606	Inst::forEachDef<Tmp>(
1607	prevInst, nextInst,
1608	[&] (Tmp& otherArg, Arg::Role, Bank argBank, Width) {
1609	if (argBank != bank)
1610	return;
1611
1612	if (traceDebug)
1613	dataLog(" Adding def-def edge: ", arg, ", ", otherArg, "\n");
1614	this->addEdge(arg, otherArg);
1615	});
1616	});
1617
1618	if (prevInst && mayBeCoalescable(*prevInst)) {
1619	// We do not want the Use() of this move to interfere with the Def(), even if it is live
1620	// after the Move. If we were to add the interference edge, it would be impossible to
1621	// coalesce the Move even if the two Tmp never interfere anywhere.
1622	Tmp defTmp;
1623	Tmp useTmp;
1624	prevInst->forEachTmp([&defTmp, &useTmp] (Tmp& argTmp, Arg::Role role, Bank, Width) {
1625	if (Arg::isLateDef(role))
1626	defTmp = argTmp;
1627	else {
1628	ASSERT(Arg::isEarlyUse(role));
1629	useTmp = argTmp;
1630	}
1631	});
1632	ASSERT(defTmp);
1633	ASSERT(useTmp);
1634
1635	unsigned nextMoveIndex = m_coalescingCandidates.size();
1636	m_coalescingCandidates.append({ TmpMapper::absoluteIndex(useTmp), TmpMapper::absoluteIndex(defTmp) });
1637	if (traceDebug)
1638	dataLogLn("Move at index ", nextMoveIndex, " is: ", *prevInst);
1639
1640	unsigned newIndexInWorklist = m_worklistMoves.addMove();
1641	ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex);
1642
1643	for (const Arg& arg : prevInst->args) {
1644	auto& list = m_moveList[TmpMapper::absoluteIndex(arg.tmp())];
1645	list.add(nextMoveIndex);
1646	}
1647
1648	auto considerEdge = [&] (const Tmp& liveTmp) {
1649	if (liveTmp != useTmp) {
1650	if (traceDebug)
1651	dataLog(" Adding def-live for coalescable: ", defTmp, ", ", liveTmp, "\n");
1652	addEdge(defTmp, liveTmp);
1653	}
1654	};
1655
1656	for (Tmp liveTmp : localCalc.live())
1657	considerEdge(liveTmp);
1658	for (const Tmp& pinnedRegTmp : m_pinnedRegs)
1659	considerEdge(pinnedRegTmp);
1660
1661	// The next instruction could have early clobbers or early def's. We need to consider
1662	// those now.
1663	addEdges(nullptr, nextInst, localCalc.live());
1664	} else
1665	addEdges(prevInst, nextInst, localCalc.live());
1666	}
1667
1668	void buildLowPriorityMoveList()
1669	{
1670	if (!isX86())
1671	return;
1672
1673	m_worklistMoves.startAddingLowPriorityMoves();
1674	for (BasicBlock* block : m_code) {
1675	for (Inst& inst : *block) {
1676	if (Optional<unsigned> defArgIndex = inst.shouldTryAliasingDef()) {
1677	Arg op1 = inst.args [*defArgIndex - `2`];
1678	Arg op2 = inst.args [*defArgIndex - `1`];
1679	Arg dest = inst.args [*defArgIndex];
1680
1681	if (op1 == dest \|\| op2 == dest)
1682	continue;
1683
1684	if (mayBeCoalesced(op1, dest))
1685	addToLowPriorityCoalescingCandidates(op1, dest);
1686	if (op1 != op2 && mayBeCoalesced(op2, dest))
1687	addToLowPriorityCoalescingCandidates(op2, dest);
1688	}
1689	}
1690	}
1691	}
1692
1693	void addEdges(Inst* prevInst, Inst* nextInst, typename TmpLiveness<bank>::LocalCalc::Iterable liveTmps)
1694	{
1695	// All the Def()s interfere with everthing live.
1696	Inst::forEachDefWithExtraClobberedRegs<Tmp>(
1697	prevInst, nextInst,
1698	[&] (const Tmp& arg, Arg::Role, Bank argBank, Width) {
1699	if (argBank != bank)
1700	return;
1701
1702	for (Tmp liveTmp : liveTmps) {
1703	ASSERT(liveTmp.isGP() == (bank == GP));
1704
1705	if (traceDebug)
1706	dataLog(" Adding def-live edge: ", arg, ", ", liveTmp, "\n");
1707
1708	addEdge(arg, liveTmp);
1709	}
1710	for (const Tmp& pinnedRegTmp : m_pinnedRegs)
1711	addEdge(arg, pinnedRegTmp);
1712	});
1713	}
1714
1715	void addEdge(Tmp a, Tmp b)
1716	{
1717	ASSERT_WITH_MESSAGE(a.isGP() == b.isGP(), "An interference between registers of different types does not make sense, it can lead to non-colorable graphs.");
1718
1719	addEdge(TmpMapper::absoluteIndex(a), TmpMapper::absoluteIndex(b));
1720	}
1721
1722	// Calling this without a tmpWidth will perform a more conservative coalescing analysis that assumes
1723	// that Move32's are not coalescable.
1724	static bool mayBeCoalescableImpl(const Inst& inst, TmpWidth* tmpWidth)
1725	{
1726	switch (bank) {
1727	case GP:
1728	switch (inst.kind.opcode) {
1729	case Move:
1730	case Move32:
1731	break;
1732	default:
1733	return false;
1734	}
1735	break;
1736	case FP:
1737	switch (inst.kind.opcode) {
1738	case MoveFloat:
1739	case MoveDouble:
1740	break;
1741	default:
1742	return false;
1743	}
1744	break;
1745	}
1746
1747	// Avoid the three-argument coalescable spill moves.
1748	if (inst.args.size() != `2`)
1749	return false;
1750
1751	if (!inst.args [`0`].isTmp() \|\| !inst.args [`1`].isTmp())
1752	return false;
1753
1754	ASSERT(inst.args[`0`].bank() == bank);
1755	ASSERT(inst.args[`1`].bank() == bank);
1756
1757	// We can coalesce a Move32 so long as either of the following holds:
1758	// - The input is already zero-filled.
1759	// - The output only cares about the low 32 bits.
1760	//
1761	// Note that the input property requires an analysis over ZDef's, so it's only valid so long
1762	// as the input gets a register. We don't know if the input gets a register, but we do know
1763	// that if it doesn't get a register then we will still emit this Move32.
1764	if (inst.kind.opcode == Move32) {
1765	if (!tmpWidth)
1766	return false;
1767
1768	if (tmpWidth->defWidth(inst.args [`0`].tmp()) > Width32
1769	&& tmpWidth->useWidth(inst.args [`1`].tmp()) > Width32)
1770	return false;
1771	}
1772
1773	return true;
1774	}
1775
1776	TmpWidth& m_tmpWidth;
1777	};
1778
1779	class GraphColoringRegisterAllocation {
1780	public:
1781	GraphColoringRegisterAllocation(Code& code, UseCounts<Tmp>& useCounts)
1782	: m_code(code)
1783	, m_useCounts(useCounts)
1784	{
1785	}
1786
1787	void run()
1788	{
1789	padInterference(m_code);
1790
1791	allocateOnBank<GP>();
1792	m_numIterations = `0`;
1793	allocateOnBank<FP>();
1794
1795	fixSpillsAfterTerminals(m_code);
1796
1797	if (reportStats)
1798	dataLog("Num iterations = ", m_numIterations, "\n");
1799	}
1800
1801	private:
1802	template<Bank bank>
1803	void allocateOnBank()
1804	{
1805	HashSet<unsigned> unspillableTmps = computeUnspillableTmps<bank>();
1806
1807	// FIXME: If a Tmp is used only from a Scratch role and that argument is !admitsStack, then
1808	// we should add the Tmp to unspillableTmps. That will help avoid relooping only to turn the
1809	// Tmp into an unspillable Tmp.
1810	// https://bugs.webkit.org/show_bug.cgi?id=152699
1811
1812	while (true) {
1813	++m_numIterations;
1814
1815	if (traceDebug)
1816	dataLog("Code at iteration ", m_numIterations, ":\n", m_code);
1817
1818	// FIXME: One way to optimize this code is to remove the recomputation inside the fixpoint.
1819	// We need to recompute because spilling adds tmps, but we could just update tmpWidth when we
1820	// add those tmps. Note that one easy way to remove the recomputation is to make any newly
1821	// added Tmps get the same use/def widths that the original Tmp got. But, this may hurt the
1822	// spill code we emit. Since we currently recompute TmpWidth after spilling, the newly
1823	// created Tmps may get narrower use/def widths. On the other hand, the spiller already
1824	// selects which move instruction to use based on the original Tmp's widths, so it may not
1825	// matter than a subsequent iteration sees a coservative width for the new Tmps. Also, the
1826	// recomputation may not actually be a performance problem; it's likely that a better way to
1827	// improve performance of TmpWidth is to replace its HashMap with something else. It's
1828	// possible that most of the TmpWidth overhead is from queries of TmpWidth rather than the
1829	// recomputation, in which case speeding up the lookup would be a bigger win.
1830	// https://bugs.webkit.org/show_bug.cgi?id=152478
1831	m_tmpWidth.recompute(m_code);
1832
1833	auto doAllocation = [&] (auto& allocator) -> bool {
1834	allocator.allocate();
1835	if (!allocator.requiresSpilling()) {
1836	this->assignRegistersToTmp<bank>(allocator);
1837	if (traceDebug)
1838	dataLog("Successfull allocation at iteration ", m_numIterations, ":\n", m_code);
1839
1840	return true;
1841	}
1842
1843	this->addSpillAndFill<bank>(allocator, unspillableTmps);
1844	return false;
1845	};
1846
1847	bool done;
1848	if (useIRC()) {
1849	ColoringAllocator<bank, IRC> allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps);
1850	done = doAllocation(allocator);
1851	} else {
1852	ColoringAllocator<bank, Briggs> allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps);
1853	done = doAllocation(allocator);
1854	}
1855	if (done)
1856	return;
1857	}
1858	}
1859
1860	template<Bank bank>
1861	HashSet<unsigned> computeUnspillableTmps()
1862	{
1863
1864	HashSet<unsigned> unspillableTmps;
1865
1866	struct Range {
1867	unsigned first { std::numeric_limits<unsigned>::max() };
1868	unsigned last { `0` };
1869	unsigned count { `0` };
1870	unsigned admitStackCount { `0` };
1871	};
1872
1873	unsigned numTmps = m_code.numTmps(bank);
1874	unsigned arraySize = AbsoluteTmpMapper<bank>::absoluteIndex(numTmps);
1875
1876	Vector<Range, `0`, UnsafeVectorOverflow> ranges;
1877	ranges.fill(Range(), arraySize);
1878
1879	unsigned globalIndex = `0`;
1880	for (BasicBlock* block : m_code) {
1881	for (Inst& inst : *block) {
1882	inst.forEachArg([&] (Arg& arg, Arg::Role, Bank argBank, Width) {
1883	if (arg.isTmp() && inst.admitsStack(arg)) {
1884	if (argBank != bank)
1885	return;
1886
1887	Tmp tmp = arg.tmp();
1888	Range& range = ranges[AbsoluteTmpMapper<bank>::absoluteIndex(tmp)];
1889	range.count++;
1890	range.admitStackCount++;
1891	if (globalIndex < range.first) {
1892	range.first = globalIndex;
1893	range.last = globalIndex;
1894	} else
1895	range.last = globalIndex;
1896
1897	return;
1898	}
1899
1900	arg.forEachTmpFast([&] (Tmp& tmp) {
1901	if (tmp.isGP() != (bank == GP))
1902	return;
1903
1904	Range& range = ranges[AbsoluteTmpMapper<bank>::absoluteIndex(tmp)];
1905	range.count++;
1906	if (globalIndex < range.first) {
1907	range.first = globalIndex;
1908	range.last = globalIndex;
1909	} else
1910	range.last = globalIndex;
1911	});
1912	});
1913
1914	++globalIndex;
1915	}
1916	++globalIndex;
1917	}
1918	for (unsigned i = AbsoluteTmpMapper<bank>::lastMachineRegisterIndex() + `1`; i < ranges.size(); ++i) {
1919	Range& range = ranges[i];
1920	if (range.last - range.first <= `1` && range.count > range.admitStackCount)
1921	unspillableTmps.add(i);
1922	}
1923
1924	return unspillableTmps;
1925	}
1926
1927	template<Bank bank, typename AllocatorType>
1928	void assignRegistersToTmp(const AllocatorType& allocator)
1929	{
1930	for (BasicBlock* block : m_code) {
1931	// Give Tmp a valid register.
1932	for (unsigned instIndex = `0`; instIndex < block->size(); ++instIndex) {
1933	Inst& inst = block->at(instIndex);
1934
1935	// The mayBeCoalescable() method will change its mind for some operations after we
1936	// complete register allocation. So, we record this before starting.
1937	bool mayBeCoalescable = allocator.mayBeCoalescable(inst);
1938
1939	// Move32 is cheaper if we know that it's equivalent to a Move. It's
1940	// equivalent if the destination's high bits are not observable or if the source's high
1941	// bits are all zero. Note that we don't have the opposite optimization for other
1942	// architectures, which may prefer Move over Move32, because Move is canonical already.
1943	if (bank == GP && inst.kind.opcode == Move
1944	&& inst.args [`0`].isTmp() && inst.args [`1`].isTmp()) {
1945	if (m_tmpWidth.useWidth(inst.args [`1`].tmp()) <= Width32
1946	\|\| m_tmpWidth.defWidth(inst.args [`0`].tmp()) <= Width32)
1947	inst.kind.opcode = Move32;
1948	}
1949
1950	inst.forEachTmpFast([&] (Tmp& tmp) {
1951	if (tmp.isReg() \|\| tmp.bank() != bank)
1952	return;
1953
1954	Tmp aliasTmp = allocator.getAlias(tmp);
1955	Tmp assignedTmp;
1956	if (aliasTmp.isReg())
1957	assignedTmp = Tmp (aliasTmp.reg());
1958	else {
1959	auto reg = allocator.allocatedReg(aliasTmp);
1960	ASSERT(reg);
1961	assignedTmp = Tmp(reg);
1962	}
1963	ASSERT(assignedTmp.isReg());
1964	tmp = assignedTmp;
1965	});
1966
1967	if (mayBeCoalescable && inst.args [`0`].isTmp() && inst.args [`1`].isTmp()
1968	&& inst.args [`0`].tmp() == inst.args [`1`].tmp())
1969	inst = Inst ();
1970	}
1971
1972	// Remove all the useless moves we created in this block.
1973	block->insts().removeAllMatching([&] (const Inst& inst) {
1974	return !inst;
1975	});
1976	}
1977	}
1978
1979	static unsigned stackSlotMinimumWidth(Width width)
1980	{
1981	return width <= Width32 ? `4` : `8`;
1982	}
1983
1984	template<Bank bank, typename AllocatorType>
1985	void addSpillAndFill(const AllocatorType& allocator, HashSet<unsigned>& unspillableTmps)
1986	{
1987	HashMap<Tmp, StackSlot*> stackSlots;
1988	for (Tmp tmp : allocator.spilledTmps()) {
1989	// All the spilled values become unspillable.
1990	unspillableTmps.add(AbsoluteTmpMapper<bank>::absoluteIndex(tmp));
1991
1992	// Allocate stack slot for each spilled value.
1993	StackSlot* stackSlot = m_code.addStackSlot(
1994	stackSlotMinimumWidth(m_tmpWidth.requiredWidth(tmp)), StackSlotKind::Spill);
1995	bool isNewTmp = stackSlots.add(tmp, stackSlot).isNewEntry;
1996	ASSERT_UNUSED(isNewTmp, isNewTmp);
1997	}
1998
1999	// Rewrite the program to get rid of the spilled Tmp.
2000	InsertionSet insertionSet(m_code);
2001	for (BasicBlock* block : m_code) {
2002	bool hasAliasedTmps = false;
2003
2004	for (unsigned instIndex = `0`; instIndex < block->size(); ++instIndex) {
2005	Inst& inst = block->at(instIndex);
2006
2007	// The TmpWidth analysis will say that a Move only stores 32 bits into the destination,
2008	// if the source only had 32 bits worth of non-zero bits. Same for the source: it will
2009	// only claim to read 32 bits from the source if only 32 bits of the destination are
2010	// read. Note that we only apply this logic if this turns into a load or store, since
2011	// Move is the canonical way to move data between GPRs.
2012	bool canUseMove32IfDidSpill = false;
2013	bool didSpill = false;
2014	bool needScratch = false;
2015	if (bank == GP && inst.kind.opcode == Move) {
2016	if ((inst.args [`0`].isTmp() && m_tmpWidth.width(inst.args [`0`].tmp()) <= Width32)
2017	\|\| (inst.args [`1`].isTmp() && m_tmpWidth.width(inst.args [`1`].tmp()) <= Width32))
2018	canUseMove32IfDidSpill = true;
2019	}
2020
2021	// Try to replace the register use by memory use when possible.
2022	inst.forEachArg(
2023	[&] (Arg& arg, Arg::Role role, Bank argBank, Width width) {
2024	if (!arg.isTmp())
2025	return;
2026	if (argBank != bank)
2027	return;
2028	if (arg.isReg())
2029	return;
2030
2031	auto stackSlotEntry = stackSlots.find(arg.tmp());
2032	if (stackSlotEntry == stackSlots.end())
2033	return;
2034	bool needScratchIfSpilledInPlace = false;
2035	if (!inst.admitsStack(arg)) {
2036	if (traceDebug)
2037	dataLog("Have an inst that won't admit stack: ", inst, "\n");
2038	switch (inst.kind.opcode) {
2039	case Move:
2040	case MoveDouble:
2041	case MoveFloat:
2042	case Move32: {
2043	unsigned argIndex = &arg - &inst.args [`0`];
2044	unsigned otherArgIndex = argIndex ^ `1`;
2045	Arg otherArg = inst.args [otherArgIndex];
2046	if (inst.args.size() == `2`
2047	&& otherArg.isStack()
2048	&& otherArg.stackSlot()->isSpill()) {
2049	needScratchIfSpilledInPlace = true;
2050	break;
2051	}
2052	return;
2053	}
2054	default:
2055	return;
2056	}
2057	}
2058
2059	// If the Tmp holds a constant then we want to rematerialize its
2060	// value rather than loading it from the stack. In order for that
2061	// optimization to kick in, we need to avoid placing the Tmp's stack
2062	// address into the instruction.
2063	if (!Arg::isColdUse(role)) {
2064	const UseCounts<Tmp>::Counts* counts = m_useCounts [arg.tmp()];
2065	if (counts && counts->numConstDefs == `1` && counts->numDefs == `1`)
2066	return;
2067	}
2068
2069	Width spillWidth = m_tmpWidth.requiredWidth(arg.tmp());
2070	if (Arg::isAnyDef(role) && width < spillWidth) {
2071	// Either there are users of this tmp who will use more than width,
2072	// or there are producers who will produce more than width non-zero
2073	// bits.
2074	// FIXME: It's not clear why we should have to return here. We have
2075	// a ZDef fixup in allocateStack. And if this isn't a ZDef, then it
2076	// doesn't seem like it matters what happens to the high bits. Note
2077	// that this isn't the case where we're storing more than what the
2078	// spill slot can hold - we already got that covered because we
2079	// stretch the spill slot on demand. One possibility is that it's ZDefs of
2080	// smaller width than 32-bit.
2081	// https://bugs.webkit.org/show_bug.cgi?id=169823
2082	return;
2083	}
2084	ASSERT(inst.kind.opcode == Move \|\| !(Arg::isAnyUse(role) && width > spillWidth));
2085
2086	if (spillWidth != Width32)
2087	canUseMove32IfDidSpill = false;
2088
2089	stackSlotEntry ->value->ensureSize(
2090	canUseMove32IfDidSpill ? `4` : bytes(width));
2091	arg = Arg::stack(stackSlotEntry ->value);
2092	didSpill = true;
2093	if (needScratchIfSpilledInPlace)
2094	needScratch = true;
2095	});
2096
2097	if (didSpill && canUseMove32IfDidSpill)
2098	inst.kind.opcode = Move32;
2099
2100	if (needScratch) {
2101	Bank instBank;
2102	switch (inst.kind.opcode) {
2103	case Move:
2104	case Move32:
2105	instBank = GP;
2106	break;
2107	case MoveDouble:
2108	case MoveFloat:
2109	instBank = FP;
2110	break;
2111	default:
2112	RELEASE_ASSERT_NOT_REACHED();
2113	instBank = GP;
2114	break;
2115	}
2116
2117	RELEASE_ASSERT(instBank == bank);
2118
2119	Tmp tmp = m_code.newTmp(bank);
2120	unspillableTmps.add(AbsoluteTmpMapper<bank>::absoluteIndex(tmp));
2121	inst.args.append(tmp);
2122	RELEASE_ASSERT(inst.args.size() == `3`);
2123
2124	// Without this, a chain of spill moves would need two registers, not one, because
2125	// the scratch registers of successive moves would appear to interfere with each
2126	// other. As well, we need this if the previous instruction had any late effects,
2127	// since otherwise the scratch would appear to interfere with those. On the other
2128	// hand, the late use added at the end of this spill move (previously it was just a
2129	// late def) doesn't change the padding situation.: the late def would have already
2130	// caused it to report hasLateUseOrDef in Inst::needsPadding.
2131	insertionSet.insert(instIndex, Nop, inst.origin);
2132	continue;
2133	}
2134
2135	// For every other case, add Load/Store as needed.
2136	inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank argBank, Width) {
2137	if (tmp.isReg() \|\| argBank != bank)
2138	return;
2139
2140	auto stackSlotEntry = stackSlots.find(tmp);
2141	if (stackSlotEntry == stackSlots.end()) {
2142	Tmp alias = allocator.getAliasWhenSpilling(tmp);
2143	if (alias != tmp) {
2144	tmp = alias;
2145	hasAliasedTmps = true;
2146	}
2147	return;
2148	}
2149
2150	Width spillWidth = m_tmpWidth.requiredWidth(tmp);
2151	Opcode move = Oops;
2152	switch (stackSlotMinimumWidth(spillWidth)) {
2153	case `4`:
2154	move = bank == GP ? Move32 : MoveFloat;
2155	break;
2156	case `8`:
2157	move = bank == GP ? Move : MoveDouble;
2158	break;
2159	default:
2160	RELEASE_ASSERT_NOT_REACHED();
2161	break;
2162	}
2163
2164	tmp = m_code.newTmp(bank);
2165	unspillableTmps.add(AbsoluteTmpMapper<bank>::absoluteIndex(tmp));
2166
2167	if (role == Arg::Scratch)
2168	return;
2169
2170	Arg arg = Arg::stack(stackSlotEntry ->value);
2171	if (Arg::isAnyUse(role))
2172	insertionSet.insert(instIndex, move, inst.origin, arg, tmp);
2173	if (Arg::isAnyDef(role))
2174	insertionSet.insert(instIndex + `1`, move, inst.origin, tmp, arg);
2175	});
2176	}
2177	insertionSet.execute(block);
2178
2179	if (hasAliasedTmps) {
2180	block->insts().removeAllMatching([&] (const Inst& inst) {
2181	return allocator.isUselessMove(inst);
2182	});
2183	}
2184	}
2185	}
2186
2187	Code& m_code;
2188	TmpWidth m_tmpWidth;
2189	UseCounts<Tmp>& m_useCounts;
2190	unsigned m_numIterations { `0` };
2191	};
2192
2193	} // anonymous namespace
2194
2195	void allocateRegistersByGraphColoring(Code& code)
2196	{
2197	PhaseScope phaseScope(code, "allocateRegistersByGraphColoring");
2198
2199	if (false)
2200	dataLog("Code before graph coloring:\n", code);
2201
2202	UseCounts<Tmp> useCounts(code);
2203	GraphColoringRegisterAllocation graphColoringRegisterAllocation(code, useCounts);
2204	graphColoringRegisterAllocation.run();
2205	}
2206
2207	} } } // namespace JSC::B3::Air
2208
2209	#endif // ENABLE(B3_JIT)
2210

Browse the source code of webkit/Source/JavaScriptCore/b3/air/AirAllocateRegistersByGraphColoring.cpp