1/*
2 * Copyright (C) 2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "AirAllocateRegistersAndStackByLinearScan.h"
28
29#if ENABLE(B3_JIT)
30
31#include "AirArgInlines.h"
32#include "AirCode.h"
33#include "AirFixSpillsAfterTerminals.h"
34#include "AirHandleCalleeSaves.h"
35#include "AirPhaseInsertionSet.h"
36#include "AirInstInlines.h"
37#include "AirLiveness.h"
38#include "AirPadInterference.h"
39#include "AirPhaseScope.h"
40#include "AirRegLiveness.h"
41#include "AirStackAllocation.h"
42#include "AirTmpInlines.h"
43#include "AirTmpMap.h"
44#include <wtf/ListDump.h>
45#include <wtf/Range.h>
46
47namespace JSC { namespace B3 { namespace Air {
48
49namespace {
50
51NO_RETURN_DUE_TO_CRASH NEVER_INLINE void crash()
52{
53 CRASH();
54}
55
56#undef RELEASE_ASSERT
57#define RELEASE_ASSERT(assertion) do { \
58 if (!(assertion)) { \
59 WTFReportAssertionFailure(__FILE__, __LINE__, WTF_PRETTY_FUNCTION, #assertion); \
60 crash(); \
61 } \
62} while (0)
63
64bool verbose() { return Options::airLinearScanVerbose(); }
65
66// Phase constants we use for the PhaseInsertionSet.
67const unsigned firstPhase = 0;
68const unsigned secondPhase = 1;
69
70typedef Range<size_t> Interval;
71
72struct TmpData {
73 void dump(PrintStream& out) const
74 {
75 out.print("{interval = ", interval, ", spilled = ", pointerDump(spilled), ", assigned = ", assigned, ", isUnspillable = ", isUnspillable, ", possibleRegs = ", possibleRegs, ", didBuildPossibleRegs = ", didBuildPossibleRegs, "}");
76 }
77
78 void validate()
79 {
80 RELEASE_ASSERT(!(spilled && assigned));
81 }
82
83 Interval interval;
84 StackSlot* spilled { nullptr };
85 RegisterSet possibleRegs;
86 Reg assigned;
87 bool isUnspillable { false };
88 bool didBuildPossibleRegs { false };
89 unsigned spillIndex { 0 };
90};
91
92struct Clobber {
93 Clobber()
94 {
95 }
96
97 Clobber(size_t index, RegisterSet regs)
98 : index(index)
99 , regs(regs)
100 {
101 }
102
103 void dump(PrintStream& out) const
104 {
105 out.print(index, ":", regs);
106 }
107
108 size_t index { 0 };
109 RegisterSet regs;
110};
111
112class LinearScan {
113public:
114 LinearScan(Code& code)
115 : m_code(code)
116 , m_startIndex(code.size())
117 , m_map(code)
118 , m_insertionSets(code.size())
119 {
120 }
121
122 void run()
123 {
124 padInterference(m_code);
125 buildRegisterSet();
126 buildIndices();
127 buildIntervals();
128 if (shouldSpillEverything()) {
129 spillEverything();
130 emitSpillCode();
131 }
132 for (;;) {
133 prepareIntervalsForScanForRegisters();
134 m_didSpill = false;
135 forEachBank(
136 [&] (Bank bank) {
137 attemptScanForRegisters(bank);
138 });
139 if (!m_didSpill)
140 break;
141 emitSpillCode();
142 }
143 insertSpillCode();
144 assignRegisters();
145 fixSpillsAfterTerminals(m_code);
146
147 handleCalleeSaves(m_code);
148 allocateEscapedStackSlots(m_code);
149 prepareIntervalsForScanForStack();
150 scanForStack();
151 updateFrameSizeBasedOnStackSlots(m_code);
152 m_code.setStackIsAllocated(true);
153 }
154
155private:
156 void buildRegisterSet()
157 {
158 forEachBank(
159 [&] (Bank bank) {
160 m_registers[bank] = m_code.regsInPriorityOrder(bank);
161 m_registerSet[bank].setAll(m_registers[bank]);
162 m_unifiedRegisterSet.merge(m_registerSet[bank]);
163 });
164 }
165
166 void buildIndices()
167 {
168 size_t index = 0;
169 for (BasicBlock* block : m_code) {
170 m_startIndex[block] = index;
171 index += block->size() * 2;
172 }
173 }
174
175 size_t indexOfHead(BasicBlock* block)
176 {
177 return m_startIndex[block];
178 }
179
180 size_t indexOfTail(BasicBlock* block)
181 {
182 return indexOfHead(block) + block->size() * 2;
183 }
184
185 Interval interval(size_t indexOfEarly, Arg::Timing timing)
186 {
187 switch (timing) {
188 case Arg::OnlyEarly:
189 return Interval(indexOfEarly);
190 case Arg::OnlyLate:
191 return Interval(indexOfEarly + 1);
192 case Arg::EarlyAndLate:
193 return Interval(indexOfEarly, indexOfEarly + 2);
194 }
195 ASSERT_NOT_REACHED();
196 return Interval();
197 }
198
199 void buildIntervals()
200 {
201 TimingScope timingScope("LinearScan::buildIntervals");
202 UnifiedTmpLiveness liveness(m_code);
203
204 for (BasicBlock* block : m_code) {
205 size_t indexOfHead = this->indexOfHead(block);
206 size_t indexOfTail = this->indexOfTail(block);
207 if (verbose()) {
208 dataLog("At block ", pointerDump(block), "\n");
209 dataLog(" indexOfHead = ", indexOfHead, "\n");
210 dataLog(" idnexOfTail = ", indexOfTail, "\n");
211 }
212 for (Tmp tmp : liveness.liveAtHead(block)) {
213 if (!tmp.isReg())
214 m_map[tmp].interval |= Interval(indexOfHead);
215 }
216 for (Tmp tmp : liveness.liveAtTail(block)) {
217 if (!tmp.isReg())
218 m_map[tmp].interval |= Interval(indexOfTail);
219 }
220
221 for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) {
222 Inst& inst = block->at(instIndex);
223 size_t indexOfEarly = indexOfHead + instIndex * 2;
224 // FIXME: We can get this information from the liveness constraints. Except of
225 // course we want to separate the earlies of one instruction from the lates of
226 // the next.
227 // https://bugs.webkit.org/show_bug.cgi?id=170850
228 inst.forEachTmp(
229 [&] (Tmp& tmp, Arg::Role role, Bank, Width) {
230 if (tmp.isReg())
231 return;
232 m_map[tmp].interval |= interval(indexOfEarly, Arg::timing(role));
233 });
234 }
235
236 RegLiveness::LocalCalcForUnifiedTmpLiveness localCalc(liveness, block);
237
238 auto record = [&] (unsigned instIndex) {
239 // FIXME: This could get the register sets from somewhere else, like the
240 // liveness constraints. Except we want those constraints to separate the late
241 // actions of one instruction from the early actions of the next.
242 // https://bugs.webkit.org/show_bug.cgi?id=170850
243 const RegisterSet& regs = localCalc.live();
244 if (Inst* prev = block->get(instIndex - 1)) {
245 RegisterSet prevRegs = regs;
246 prev->forEach<Reg>(
247 [&] (Reg& reg, Arg::Role role, Bank, Width) {
248 if (Arg::isLateDef(role))
249 prevRegs.add(reg);
250 });
251 if (prev->kind.opcode == Patch)
252 prevRegs.merge(prev->extraClobberedRegs());
253 prevRegs.filter(m_unifiedRegisterSet);
254 if (!prevRegs.isEmpty())
255 m_clobbers.append(Clobber(indexOfHead + instIndex * 2 - 1, prevRegs));
256 }
257 if (Inst* next = block->get(instIndex)) {
258 RegisterSet nextRegs = regs;
259 next->forEach<Reg>(
260 [&] (Reg& reg, Arg::Role role, Bank, Width) {
261 if (Arg::isEarlyDef(role))
262 nextRegs.add(reg);
263 });
264 if (next->kind.opcode == Patch)
265 nextRegs.merge(next->extraEarlyClobberedRegs());
266 if (!nextRegs.isEmpty())
267 m_clobbers.append(Clobber(indexOfHead + instIndex * 2, nextRegs));
268 }
269 };
270
271 record(block->size());
272 for (unsigned instIndex = block->size(); instIndex--;) {
273 localCalc.execute(instIndex);
274 record(instIndex);
275 }
276 }
277
278 std::sort(
279 m_clobbers.begin(), m_clobbers.end(),
280 [] (Clobber& a, Clobber& b) -> bool {
281 return a.index < b.index;
282 });
283
284 if (verbose()) {
285 dataLog("Intervals:\n");
286 m_code.forEachTmp(
287 [&] (Tmp tmp) {
288 dataLog(" ", tmp, ": ", m_map[tmp], "\n");
289 });
290 dataLog("Clobbers: ", listDump(m_clobbers), "\n");
291 }
292 }
293
294 bool shouldSpillEverything()
295 {
296 if (!Options::airLinearScanSpillsEverything())
297 return false;
298
299 // You're meant to hack this so that you selectively spill everything depending on reasons.
300 // That's super useful for debugging.
301
302 return true;
303 }
304
305 void spillEverything()
306 {
307 m_code.forEachTmp(
308 [&] (Tmp tmp) {
309 spill(tmp);
310 });
311 }
312
313 void prepareIntervalsForScanForRegisters()
314 {
315 prepareIntervals(
316 [&] (TmpData& data) -> bool {
317 if (data.spilled)
318 return false;
319
320 data.assigned = Reg();
321 return true;
322 });
323 }
324
325 void prepareIntervalsForScanForStack()
326 {
327 prepareIntervals(
328 [&] (TmpData& data) -> bool {
329 return data.spilled;
330 });
331 }
332
333 template<typename SelectFunc>
334 void prepareIntervals(const SelectFunc& selectFunc)
335 {
336 m_tmps.shrink(0);
337
338 m_code.forEachTmp(
339 [&] (Tmp tmp) {
340 TmpData& data = m_map[tmp];
341 if (!selectFunc(data))
342 return;
343
344 m_tmps.append(tmp);
345 });
346
347 std::sort(
348 m_tmps.begin(), m_tmps.end(),
349 [&] (Tmp& a, Tmp& b) {
350 return m_map[a].interval.begin() < m_map[b].interval.begin();
351 });
352
353 if (verbose())
354 dataLog("Tmps: ", listDump(m_tmps), "\n");
355 }
356
357 Tmp addSpillTmpWithInterval(Bank bank, Interval interval)
358 {
359 TmpData data;
360 data.interval = interval;
361 data.isUnspillable = true;
362
363 Tmp tmp = m_code.newTmp(bank);
364 m_map.append(tmp, data);
365 return tmp;
366 }
367
368 void attemptScanForRegisters(Bank bank)
369 {
370 // This is modeled after LinearScanRegisterAllocation in Fig. 1 in
371 // http://dl.acm.org/citation.cfm?id=330250.
372
373 m_active.clear();
374 m_activeRegs = { };
375
376 size_t clobberIndex = 0;
377 for (Tmp& tmp : m_tmps) {
378 if (tmp.bank() != bank)
379 continue;
380
381 TmpData& entry = m_map[tmp];
382 size_t index = entry.interval.begin();
383
384 if (verbose()) {
385 dataLog("Index #", index, ": ", tmp, "\n");
386 dataLog(" ", tmp, ": ", entry, "\n");
387 dataLog(" clobberIndex = ", clobberIndex, "\n");
388 // This could be so much faster.
389 BasicBlock* block = m_code[0];
390 for (BasicBlock* candidateBlock : m_code) {
391 if (m_startIndex[candidateBlock] > index)
392 break;
393 block = candidateBlock;
394 }
395 unsigned instIndex = (index - m_startIndex[block] + 1) / 2;
396 dataLog(" At: ", pointerDump(block), ", instIndex = ", instIndex, "\n");
397 dataLog(" Prev: ", pointerDump(block->get(instIndex - 1)), "\n");
398 dataLog(" Next: ", pointerDump(block->get(instIndex)), "\n");
399 dataLog(" Active:\n");
400 for (Tmp tmp : m_active)
401 dataLog(" ", tmp, ": ", m_map[tmp], "\n");
402 }
403
404 // This is ExpireOldIntervals in Fig. 1.
405 while (!m_active.isEmpty()) {
406 Tmp tmp = m_active.first();
407 TmpData& entry = m_map[tmp];
408
409 bool expired = entry.interval.end() <= index;
410
411 if (!expired)
412 break;
413
414 m_active.removeFirst();
415 m_activeRegs.remove(entry.assigned);
416 }
417
418 // If necessary, compute the set of registers that this tmp could even use. This is not
419 // part of Fig. 1, but it's a technique that the authors claim to have implemented in one of
420 // their two implementations. There may be other more efficient ways to do this, but this
421 // implementation gets some perf wins from piggy-backing this calculation in the scan.
422 //
423 // Note that the didBuild flag sticks through spilling. Spilling doesn't change the
424 // interference situation.
425 //
426 // Note that we could short-circuit this if we're dealing with a spillable tmp, there are no
427 // free registers, and this register's interval ends after the one on the top of the active
428 // stack.
429 if (!entry.didBuildPossibleRegs) {
430 // Advance the clobber index until it's at a clobber that is relevant to us.
431 while (clobberIndex < m_clobbers.size() && m_clobbers[clobberIndex].index < index)
432 clobberIndex++;
433
434 RegisterSet possibleRegs = m_registerSet[bank];
435 for (size_t i = clobberIndex; i < m_clobbers.size() && m_clobbers[i].index < entry.interval.end(); ++i)
436 possibleRegs.exclude(m_clobbers[i].regs);
437
438 entry.possibleRegs = possibleRegs;
439 entry.didBuildPossibleRegs = true;
440 }
441
442 if (verbose())
443 dataLog(" Possible regs: ", entry.possibleRegs, "\n");
444
445 // Find a free register that we are allowed to use.
446 if (m_active.size() != m_registers[bank].size()) {
447 bool didAssign = false;
448 for (Reg reg : m_registers[bank]) {
449 // FIXME: Could do priority coloring here.
450 // https://bugs.webkit.org/show_bug.cgi?id=170304
451 if (!m_activeRegs.contains(reg) && entry.possibleRegs.contains(reg)) {
452 assign(tmp, reg);
453 didAssign = true;
454 break;
455 }
456 }
457 if (didAssign)
458 continue;
459 }
460
461 // This is SpillAtInterval in Fig. 1, but modified to handle clobbers.
462 Tmp spillTmp = m_active.takeLast(
463 [&] (Tmp spillCandidate) -> bool {
464 return entry.possibleRegs.contains(m_map[spillCandidate].assigned);
465 });
466 if (!spillTmp) {
467 spill(tmp);
468 continue;
469 }
470 TmpData& spillEntry = m_map[spillTmp];
471 RELEASE_ASSERT(spillEntry.assigned);
472 if (spillEntry.isUnspillable ||
473 (!entry.isUnspillable && spillEntry.interval.end() <= entry.interval.end())) {
474 spill(tmp);
475 addToActive(spillTmp);
476 continue;
477 }
478
479 assign(tmp, spillEntry.assigned);
480 spill(spillTmp);
481 }
482 }
483
484 void addToActive(Tmp tmp)
485 {
486 if (m_map[tmp].isUnspillable) {
487 m_active.prepend(tmp);
488 return;
489 }
490
491 m_active.appendAndBubble(
492 tmp,
493 [&] (Tmp otherTmp) -> bool {
494 TmpData& otherEntry = m_map[otherTmp];
495 if (otherEntry.isUnspillable)
496 return false;
497 return m_map[otherTmp].interval.end() > m_map[tmp].interval.end();
498 });
499 }
500
501 void assign(Tmp tmp, Reg reg)
502 {
503 TmpData& entry = m_map[tmp];
504 RELEASE_ASSERT(!entry.spilled);
505 entry.assigned = reg;
506 m_activeRegs.add(reg);
507 addToActive(tmp);
508 }
509
510 void spill(Tmp tmp)
511 {
512 TmpData& entry = m_map[tmp];
513 RELEASE_ASSERT(!entry.isUnspillable);
514 entry.spilled = m_code.addStackSlot(8, StackSlotKind::Spill);
515 entry.assigned = Reg();
516 m_didSpill = true;
517 }
518
519 void emitSpillCode()
520 {
521 for (BasicBlock* block : m_code) {
522 size_t indexOfHead = this->indexOfHead(block);
523 for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) {
524 Inst& inst = block->at(instIndex);
525 unsigned indexOfEarly = indexOfHead + instIndex * 2;
526
527 // First try to spill directly.
528 for (unsigned i = 0; i < inst.args.size(); ++i) {
529 Arg& arg = inst.args[i];
530 if (!arg.isTmp())
531 continue;
532 if (arg.isReg())
533 continue;
534 StackSlot* spilled = m_map[arg.tmp()].spilled;
535 if (!spilled)
536 continue;
537 if (!inst.admitsStack(i))
538 continue;
539 arg = Arg::stack(spilled);
540 }
541
542 // Fall back on the hard way.
543 inst.forEachTmp(
544 [&] (Tmp& tmp, Arg::Role role, Bank bank, Width) {
545 if (tmp.isReg())
546 return;
547 StackSlot* spilled = m_map[tmp].spilled;
548 if (!spilled)
549 return;
550 Opcode move = bank == GP ? Move : MoveDouble;
551 tmp = addSpillTmpWithInterval(bank, interval(indexOfEarly, Arg::timing(role)));
552 if (role == Arg::Scratch)
553 return;
554 if (Arg::isAnyUse(role))
555 m_insertionSets[block].insert(instIndex, secondPhase, move, inst.origin, Arg::stack(spilled), tmp);
556 if (Arg::isAnyDef(role))
557 m_insertionSets[block].insert(instIndex + 1, firstPhase, move, inst.origin, tmp, Arg::stack(spilled));
558 });
559 }
560 }
561 }
562
563 void scanForStack()
564 {
565 // This is loosely modeled after LinearScanRegisterAllocation in Fig. 1 in
566 // http://dl.acm.org/citation.cfm?id=330250.
567
568 m_active.clear();
569 m_usedSpillSlots.clearAll();
570
571 for (Tmp& tmp : m_tmps) {
572 TmpData& entry = m_map[tmp];
573 if (!entry.spilled)
574 continue;
575
576 size_t index = entry.interval.begin();
577
578 // This is ExpireOldIntervals in Fig. 1.
579 while (!m_active.isEmpty()) {
580 Tmp tmp = m_active.first();
581 TmpData& entry = m_map[tmp];
582
583 bool expired = entry.interval.end() <= index;
584
585 if (!expired)
586 break;
587
588 m_active.removeFirst();
589 m_usedSpillSlots.clear(entry.spillIndex);
590 }
591
592 entry.spillIndex = m_usedSpillSlots.findBit(0, false);
593 ptrdiff_t offset = -static_cast<ptrdiff_t>(m_code.frameSize()) - static_cast<ptrdiff_t>(entry.spillIndex) * 8 - 8;
594 if (verbose())
595 dataLog(" Assigning offset = ", offset, " to spill ", pointerDump(entry.spilled), " for ", tmp, "\n");
596 entry.spilled->setOffsetFromFP(offset);
597 m_usedSpillSlots.set(entry.spillIndex);
598 m_active.append(tmp);
599 }
600 }
601
602 void insertSpillCode()
603 {
604 for (BasicBlock* block : m_code)
605 m_insertionSets[block].execute(block);
606 }
607
608 void assignRegisters()
609 {
610 if (verbose()) {
611 dataLog("About to allocate registers. State of all tmps:\n");
612 m_code.forEachTmp(
613 [&] (Tmp tmp) {
614 dataLog(" ", tmp, ": ", m_map[tmp], "\n");
615 });
616 dataLog("IR:\n");
617 dataLog(m_code);
618 }
619
620 for (BasicBlock* block : m_code) {
621 for (Inst& inst : *block) {
622 if (verbose())
623 dataLog("At: ", inst, "\n");
624 inst.forEachTmpFast(
625 [&] (Tmp& tmp) {
626 if (tmp.isReg())
627 return;
628
629 Reg reg = m_map[tmp].assigned;
630 if (!reg) {
631 dataLog("Failed to allocate reg for: ", tmp, "\n");
632 RELEASE_ASSERT_NOT_REACHED();
633 }
634 tmp = Tmp(reg);
635 });
636 }
637 }
638 }
639
640 Code& m_code;
641 Vector<Reg> m_registers[numBanks];
642 RegisterSet m_registerSet[numBanks];
643 RegisterSet m_unifiedRegisterSet;
644 IndexMap<BasicBlock*, size_t> m_startIndex;
645 TmpMap<TmpData> m_map;
646 IndexMap<BasicBlock*, PhaseInsertionSet> m_insertionSets;
647 Vector<Clobber> m_clobbers; // After we allocate this, we happily point pointers into it.
648 Vector<Tmp> m_tmps;
649 Deque<Tmp> m_active;
650 RegisterSet m_activeRegs;
651 BitVector m_usedSpillSlots;
652 bool m_didSpill { false };
653};
654
655} // anonymous namespace
656
657void allocateRegistersAndStackByLinearScan(Code& code)
658{
659 PhaseScope phaseScope(code, "allocateRegistersAndStackByLinearScan");
660 if (verbose())
661 dataLog("Air before linear scan:\n", code);
662 LinearScan linearScan(code);
663 linearScan.run();
664 if (verbose())
665 dataLog("Air after linear scan:\n", code);
666}
667
668} } } // namespace JSC::B3::Air
669
670#endif // ENABLE(B3_JIT)
671