| 1 | /* |
| 2 | * Copyright (C) 2019 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include "AirAllocateRegistersAndStackAndGenerateCode.h" |
| 28 | |
| 29 | #if ENABLE(B3_JIT) |
| 30 | |
| 31 | #include "AirArgInlines.h" |
| 32 | #include "AirBlockInsertionSet.h" |
| 33 | #include "AirCode.h" |
| 34 | #include "AirHandleCalleeSaves.h" |
| 35 | #include "AirLowerStackArgs.h" |
| 36 | #include "AirStackAllocation.h" |
| 37 | #include "AirTmpMap.h" |
| 38 | #include "CCallHelpers.h" |
| 39 | #include "DisallowMacroScratchRegisterUsage.h" |
| 40 | |
| 41 | namespace JSC { namespace B3 { namespace Air { |
| 42 | |
| 43 | GenerateAndAllocateRegisters::GenerateAndAllocateRegisters(Code& code) |
| 44 | : m_code(code) |
| 45 | , m_map(code) |
| 46 | { } |
| 47 | |
| 48 | void GenerateAndAllocateRegisters::buildLiveRanges(UnifiedTmpLiveness& liveness) |
| 49 | { |
| 50 | m_liveRangeEnd = TmpMap<size_t>(m_code, 0); |
| 51 | |
| 52 | m_globalInstIndex = 0; |
| 53 | for (BasicBlock* block : m_code) { |
| 54 | for (Tmp tmp : liveness.liveAtHead(block)) { |
| 55 | if (!tmp.isReg()) |
| 56 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
| 57 | } |
| 58 | for (Inst& inst : *block) { |
| 59 | inst.forEachTmpFast([&] (Tmp tmp) { |
| 60 | if (!tmp.isReg()) |
| 61 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
| 62 | }); |
| 63 | ++m_globalInstIndex; |
| 64 | } |
| 65 | for (Tmp tmp : liveness.liveAtTail(block)) { |
| 66 | if (!tmp.isReg()) |
| 67 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
| 68 | } |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | void GenerateAndAllocateRegisters::insertBlocksForFlushAfterTerminalPatchpoints() |
| 73 | { |
| 74 | BlockInsertionSet blockInsertionSet(m_code); |
| 75 | for (BasicBlock* block : m_code) { |
| 76 | Inst& inst = block->last(); |
| 77 | if (inst.kind.opcode != Patch) |
| 78 | continue; |
| 79 | |
| 80 | HashMap<Tmp, Arg*> needToDef; |
| 81 | |
| 82 | inst.forEachArg([&] (Arg& arg, Arg::Role role, Bank, Width) { |
| 83 | if (!arg.isTmp()) |
| 84 | return; |
| 85 | Tmp tmp = arg.tmp(); |
| 86 | if (Arg::isAnyDef(role) && !tmp.isReg()) |
| 87 | needToDef.add(tmp, &arg); |
| 88 | }); |
| 89 | |
| 90 | if (needToDef.isEmpty()) |
| 91 | continue; |
| 92 | |
| 93 | for (FrequentedBlock& frequentedSuccessor : block->successors()) { |
| 94 | BasicBlock* successor = frequentedSuccessor.block(); |
| 95 | BasicBlock* newBlock = blockInsertionSet.insertBefore(successor, successor->frequency()); |
| 96 | newBlock->appendInst(Inst(Jump, inst.origin)); |
| 97 | newBlock->setSuccessors(successor); |
| 98 | newBlock->addPredecessor(block); |
| 99 | frequentedSuccessor.block() = newBlock; |
| 100 | successor->replacePredecessor(block, newBlock); |
| 101 | |
| 102 | m_blocksAfterTerminalPatchForSpilling.add(newBlock, PatchSpillData { CCallHelpers::Jump(), CCallHelpers::Label(), needToDef }); |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | blockInsertionSet.execute(); |
| 107 | } |
| 108 | |
| 109 | static ALWAYS_INLINE CCallHelpers::Address callFrameAddr(CCallHelpers& jit, intptr_t offsetFromFP) |
| 110 | { |
| 111 | if (isX86()) { |
| 112 | ASSERT(Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP).isValidForm(Width64)); |
| 113 | return CCallHelpers::Address(GPRInfo::callFrameRegister, offsetFromFP); |
| 114 | } |
| 115 | |
| 116 | ASSERT(pinnedExtendedOffsetAddrRegister()); |
| 117 | auto addr = Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP); |
| 118 | if (addr.isValidForm(Width64)) |
| 119 | return CCallHelpers::Address(GPRInfo::callFrameRegister, offsetFromFP); |
| 120 | GPRReg reg = *pinnedExtendedOffsetAddrRegister(); |
| 121 | jit.move(CCallHelpers::TrustedImmPtr(offsetFromFP), reg); |
| 122 | jit.add64(GPRInfo::callFrameRegister, reg); |
| 123 | return CCallHelpers::Address(reg); |
| 124 | } |
| 125 | |
| 126 | ALWAYS_INLINE void GenerateAndAllocateRegisters::flush(Tmp tmp, Reg reg) |
| 127 | { |
| 128 | ASSERT(tmp); |
| 129 | intptr_t offset = m_map[tmp].spillSlot->offsetFromFP(); |
| 130 | if (tmp.isGP()) |
| 131 | m_jit->store64(reg.gpr(), callFrameAddr(*m_jit, offset)); |
| 132 | else |
| 133 | m_jit->storeDouble(reg.fpr(), callFrameAddr(*m_jit, offset)); |
| 134 | } |
| 135 | |
| 136 | ALWAYS_INLINE void GenerateAndAllocateRegisters::spill(Tmp tmp, Reg reg) |
| 137 | { |
| 138 | ASSERT(reg); |
| 139 | ASSERT(m_map[tmp].reg == reg); |
| 140 | m_availableRegs[tmp.bank()].set(reg); |
| 141 | m_currentAllocation->at(reg) = Tmp(); |
| 142 | flush(tmp, reg); |
| 143 | m_map[tmp].reg = Reg(); |
| 144 | } |
| 145 | |
| 146 | ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, bool isDef) |
| 147 | { |
| 148 | if (Tmp occupyingTmp = m_currentAllocation->at(reg)) |
| 149 | spill(occupyingTmp, reg); |
| 150 | else { |
| 151 | ASSERT(!m_currentAllocation->at(reg)); |
| 152 | ASSERT(m_availableRegs[tmp.bank()].get(reg)); |
| 153 | } |
| 154 | |
| 155 | m_map[tmp].reg = reg; |
| 156 | m_availableRegs[tmp.bank()].clear(reg); |
| 157 | m_currentAllocation->at(reg) = tmp; |
| 158 | |
| 159 | if (!isDef) { |
| 160 | intptr_t offset = m_map[tmp].spillSlot->offsetFromFP(); |
| 161 | if (tmp.bank() == GP) |
| 162 | m_jit->load64(callFrameAddr(*m_jit, offset), reg.gpr()); |
| 163 | else |
| 164 | m_jit->loadDouble(callFrameAddr(*m_jit, offset), reg.fpr()); |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded() |
| 169 | { |
| 170 | if (m_didAlreadyFreeDeadSlots) |
| 171 | return; |
| 172 | |
| 173 | m_didAlreadyFreeDeadSlots = true; |
| 174 | for (size_t i = 0; i < m_currentAllocation->size(); ++i) { |
| 175 | Tmp tmp = m_currentAllocation->at(i); |
| 176 | if (!tmp) |
| 177 | continue; |
| 178 | if (tmp.isReg()) |
| 179 | continue; |
| 180 | if (m_liveRangeEnd[tmp] >= m_globalInstIndex) |
| 181 | continue; |
| 182 | |
| 183 | Reg reg = Reg::fromIndex(i); |
| 184 | m_map[tmp].reg = Reg(); |
| 185 | m_availableRegs[tmp.bank()].set(reg); |
| 186 | m_currentAllocation->at(i) = Tmp(); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | ALWAYS_INLINE bool GenerateAndAllocateRegisters::assignTmp(Tmp& tmp, Bank bank, bool isDef) |
| 191 | { |
| 192 | ASSERT(!tmp.isReg()); |
| 193 | if (Reg reg = m_map[tmp].reg) { |
| 194 | ASSERT(!m_namedDefdRegs.contains(reg)); |
| 195 | tmp = Tmp(reg); |
| 196 | m_namedUsedRegs.set(reg); |
| 197 | ASSERT(!m_availableRegs[bank].get(reg)); |
| 198 | return true; |
| 199 | } |
| 200 | |
| 201 | if (!m_availableRegs[bank].numberOfSetRegisters()) |
| 202 | freeDeadTmpsIfNeeded(); |
| 203 | |
| 204 | if (m_availableRegs[bank].numberOfSetRegisters()) { |
| 205 | // We first take an available register. |
| 206 | for (Reg reg : m_registers[bank]) { |
| 207 | if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg)) |
| 208 | continue; |
| 209 | if (!m_availableRegs[bank].contains(reg)) |
| 210 | continue; |
| 211 | m_namedUsedRegs.set(reg); // At this point, it doesn't matter if we add it to the m_namedUsedRegs or m_namedDefdRegs. We just need to mark that we can't use it again. |
| 212 | alloc(tmp, reg, isDef); |
| 213 | tmp = Tmp(reg); |
| 214 | return true; |
| 215 | } |
| 216 | |
| 217 | RELEASE_ASSERT_NOT_REACHED(); |
| 218 | } |
| 219 | |
| 220 | // Nothing was available, let's make some room. |
| 221 | for (Reg reg : m_registers[bank]) { |
| 222 | if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg)) |
| 223 | continue; |
| 224 | |
| 225 | m_namedUsedRegs.set(reg); |
| 226 | |
| 227 | alloc(tmp, reg, isDef); |
| 228 | tmp = Tmp(reg); |
| 229 | return true; |
| 230 | } |
| 231 | |
| 232 | // This can happen if we have a #WarmAnys > #Available registers |
| 233 | return false; |
| 234 | } |
| 235 | |
| 236 | ALWAYS_INLINE bool GenerateAndAllocateRegisters::isDisallowedRegister(Reg reg) |
| 237 | { |
| 238 | return !m_allowedRegisters.get(reg); |
| 239 | } |
| 240 | |
| 241 | void GenerateAndAllocateRegisters::prepareForGeneration() |
| 242 | { |
| 243 | // We pessimistically assume we use all callee saves. |
| 244 | handleCalleeSaves(m_code, RegisterSet::calleeSaveRegisters()); |
| 245 | allocateEscapedStackSlots(m_code); |
| 246 | |
| 247 | // Each Tmp gets its own stack slot. |
| 248 | auto createStackSlot = [&] (const Tmp& tmp) { |
| 249 | TmpData data; |
| 250 | data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill); |
| 251 | data.reg = Reg(); |
| 252 | m_map[tmp] = data; |
| 253 | #if !ASSERT_DISABLED |
| 254 | m_allTmps[tmp.bank()].append(tmp); |
| 255 | #endif |
| 256 | }; |
| 257 | |
| 258 | m_code.forEachTmp([&] (Tmp tmp) { |
| 259 | ASSERT(!tmp.isReg()); |
| 260 | createStackSlot(tmp); |
| 261 | }); |
| 262 | |
| 263 | m_allowedRegisters = RegisterSet(); |
| 264 | |
| 265 | forEachBank([&] (Bank bank) { |
| 266 | m_registers[bank] = m_code.regsInPriorityOrder(bank); |
| 267 | |
| 268 | for (Reg reg : m_registers[bank]) { |
| 269 | m_allowedRegisters.set(reg); |
| 270 | createStackSlot(Tmp(reg)); |
| 271 | } |
| 272 | }); |
| 273 | |
| 274 | { |
| 275 | unsigned nextIndex = 0; |
| 276 | for (StackSlot* slot : m_code.stackSlots()) { |
| 277 | if (slot->isLocked()) |
| 278 | continue; |
| 279 | intptr_t offset = -static_cast<intptr_t>(m_code.frameSize()) - static_cast<intptr_t>(nextIndex) * 8 - 8; |
| 280 | ++nextIndex; |
| 281 | slot->setOffsetFromFP(offset); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | updateFrameSizeBasedOnStackSlots(m_code); |
| 286 | m_code.setStackIsAllocated(true); |
| 287 | |
| 288 | lowerStackArgs(m_code); |
| 289 | |
| 290 | // Verify none of these passes add any tmps. |
| 291 | #if !ASSERT_DISABLED |
| 292 | forEachBank([&] (Bank bank) { |
| 293 | ASSERT(m_allTmps[bank].size() - m_registers[bank].size() == m_code.numTmps(bank)); |
| 294 | }); |
| 295 | #endif |
| 296 | } |
| 297 | |
| 298 | void GenerateAndAllocateRegisters::generate(CCallHelpers& jit) |
| 299 | { |
| 300 | m_jit = &jit; |
| 301 | |
| 302 | TimingScope timingScope("Air::generateAndAllocateRegisters" ); |
| 303 | |
| 304 | insertBlocksForFlushAfterTerminalPatchpoints(); |
| 305 | |
| 306 | DisallowMacroScratchRegisterUsage disallowScratch(*m_jit); |
| 307 | |
| 308 | UnifiedTmpLiveness liveness(m_code); |
| 309 | buildLiveRanges(liveness); |
| 310 | |
| 311 | IndexMap<BasicBlock*, IndexMap<Reg, Tmp>> currentAllocationMap(m_code.size()); |
| 312 | { |
| 313 | IndexMap<Reg, Tmp> defaultCurrentAllocation(Reg::maxIndex() + 1); |
| 314 | for (BasicBlock* block : m_code) |
| 315 | currentAllocationMap[block] = defaultCurrentAllocation; |
| 316 | |
| 317 | // The only things live that are in registers at the root blocks are |
| 318 | // the explicitly named registers that are live. |
| 319 | |
| 320 | for (unsigned i = m_code.numEntrypoints(); i--;) { |
| 321 | BasicBlock* entrypoint = m_code.entrypoint(i).block(); |
| 322 | for (Tmp tmp : liveness.liveAtHead(entrypoint)) { |
| 323 | if (tmp.isReg()) |
| 324 | currentAllocationMap[entrypoint][tmp.reg()] = tmp; |
| 325 | } |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | // And now, we generate code. |
| 330 | GenerationContext context; |
| 331 | context.code = &m_code; |
| 332 | context.blockLabels.resize(m_code.size()); |
| 333 | for (BasicBlock* block : m_code) |
| 334 | context.blockLabels[block] = Box<CCallHelpers::Label>::create(); |
| 335 | IndexMap<BasicBlock*, CCallHelpers::JumpList> blockJumps(m_code.size()); |
| 336 | |
| 337 | auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) { |
| 338 | if (context.blockLabels[target]->isSet()) { |
| 339 | jump.linkTo(*context.blockLabels[target], m_jit); |
| 340 | return; |
| 341 | } |
| 342 | |
| 343 | blockJumps[target].append(jump); |
| 344 | }; |
| 345 | |
| 346 | Disassembler* disassembler = m_code.disassembler(); |
| 347 | |
| 348 | m_globalInstIndex = 0; |
| 349 | |
| 350 | for (BasicBlock* block : m_code) { |
| 351 | context.currentBlock = block; |
| 352 | context.indexInBlock = UINT_MAX; |
| 353 | blockJumps[block].link(m_jit); |
| 354 | CCallHelpers::Label label = m_jit->label(); |
| 355 | *context.blockLabels[block] = label; |
| 356 | |
| 357 | if (disassembler) |
| 358 | disassembler->startBlock(block, *m_jit); |
| 359 | |
| 360 | if (Optional<unsigned> entrypointIndex = m_code.entrypointIndex(block)) { |
| 361 | ASSERT(m_code.isEntrypoint(block)); |
| 362 | if (disassembler) |
| 363 | disassembler->startEntrypoint(*m_jit); |
| 364 | |
| 365 | m_code.prologueGeneratorForEntrypoint(*entrypointIndex)->run(*m_jit, m_code); |
| 366 | |
| 367 | if (disassembler) |
| 368 | disassembler->endEntrypoint(*m_jit); |
| 369 | } else |
| 370 | ASSERT(!m_code.isEntrypoint(block)); |
| 371 | |
| 372 | auto startLabel = m_jit->labelIgnoringWatchpoints(); |
| 373 | |
| 374 | { |
| 375 | auto iter = m_blocksAfterTerminalPatchForSpilling.find(block); |
| 376 | if (iter != m_blocksAfterTerminalPatchForSpilling.end()) { |
| 377 | auto& data = iter->value; |
| 378 | data.jump = m_jit->jump(); |
| 379 | data.continueLabel = m_jit->label(); |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | forEachBank([&] (Bank bank) { |
| 384 | #if !ASSERT_DISABLED |
| 385 | // By default, everything is spilled at block boundaries. We do this after we process each block |
| 386 | // so we don't have to walk all Tmps, since #Tmps >> #Available regs. Instead, we walk the register file at |
| 387 | // each block boundary and clear entries in this map. |
| 388 | for (Tmp tmp : m_allTmps[bank]) |
| 389 | ASSERT(m_map[tmp].reg == Reg()); |
| 390 | #endif |
| 391 | |
| 392 | RegisterSet availableRegisters; |
| 393 | for (Reg reg : m_registers[bank]) |
| 394 | availableRegisters.set(reg); |
| 395 | m_availableRegs[bank] = WTFMove(availableRegisters); |
| 396 | }); |
| 397 | |
| 398 | IndexMap<Reg, Tmp>& currentAllocation = currentAllocationMap[block]; |
| 399 | m_currentAllocation = ¤tAllocation; |
| 400 | |
| 401 | for (unsigned i = 0; i < currentAllocation.size(); ++i) { |
| 402 | Tmp tmp = currentAllocation[i]; |
| 403 | if (!tmp) |
| 404 | continue; |
| 405 | Reg reg = Reg::fromIndex(i); |
| 406 | m_map[tmp].reg = reg; |
| 407 | m_availableRegs[tmp.bank()].clear(reg); |
| 408 | } |
| 409 | |
| 410 | bool isReplayingSameInst = false; |
| 411 | for (size_t instIndex = 0; instIndex < block->size(); ++instIndex) { |
| 412 | if (instIndex && !isReplayingSameInst) |
| 413 | startLabel = m_jit->labelIgnoringWatchpoints(); |
| 414 | |
| 415 | context.indexInBlock = instIndex; |
| 416 | |
| 417 | Inst& inst = block->at(instIndex); |
| 418 | |
| 419 | m_didAlreadyFreeDeadSlots = false; |
| 420 | |
| 421 | m_namedUsedRegs = RegisterSet(); |
| 422 | m_namedDefdRegs = RegisterSet(); |
| 423 | |
| 424 | inst.forEachArg([&] (Arg& arg, Arg::Role role, Bank, Width) { |
| 425 | if (!arg.isTmp()) |
| 426 | return; |
| 427 | |
| 428 | Tmp tmp = arg.tmp(); |
| 429 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
| 430 | return; |
| 431 | |
| 432 | if (tmp.isReg()) { |
| 433 | if (Arg::isAnyUse(role)) |
| 434 | m_namedUsedRegs.set(tmp.reg()); |
| 435 | if (Arg::isAnyDef(role)) |
| 436 | m_namedDefdRegs.set(tmp.reg()); |
| 437 | } |
| 438 | |
| 439 | // We convert any cold uses that are already in the stack to just point to |
| 440 | // the canonical stack location. |
| 441 | if (!Arg::isColdUse(role)) |
| 442 | return; |
| 443 | |
| 444 | if (!inst.admitsStack(arg)) |
| 445 | return; |
| 446 | |
| 447 | auto& entry = m_map[tmp]; |
| 448 | if (!entry.reg) { |
| 449 | // We're a cold use, and our current location is already on the stack. Just use that. |
| 450 | arg = Arg::addr(Tmp(GPRInfo::callFrameRegister), entry.spillSlot->offsetFromFP()); |
| 451 | } |
| 452 | }); |
| 453 | |
| 454 | RegisterSet clobberedRegisters; |
| 455 | { |
| 456 | Inst* nextInst = block->get(instIndex + 1); |
| 457 | if (inst.kind.opcode == Patch || (nextInst && nextInst->kind.opcode == Patch)) { |
| 458 | if (inst.kind.opcode == Patch) |
| 459 | clobberedRegisters.merge(inst.extraClobberedRegs()); |
| 460 | if (nextInst && nextInst->kind.opcode == Patch) |
| 461 | clobberedRegisters.merge(nextInst->extraEarlyClobberedRegs()); |
| 462 | |
| 463 | clobberedRegisters.filter(m_allowedRegisters); |
| 464 | clobberedRegisters.exclude(m_namedDefdRegs); |
| 465 | |
| 466 | m_namedDefdRegs.merge(clobberedRegisters); |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | auto allocNamed = [&] (const RegisterSet& named, bool isDef) { |
| 471 | for (Reg reg : named) { |
| 472 | if (Tmp occupyingTmp = currentAllocation[reg]) { |
| 473 | if (occupyingTmp == Tmp(reg)) |
| 474 | continue; |
| 475 | } |
| 476 | |
| 477 | freeDeadTmpsIfNeeded(); // We don't want to spill a dead tmp. |
| 478 | alloc(Tmp(reg), reg, isDef); |
| 479 | } |
| 480 | }; |
| 481 | |
| 482 | allocNamed(m_namedUsedRegs, false); // Must come before the defd registers since we may use and def the same register. |
| 483 | allocNamed(m_namedDefdRegs, true); |
| 484 | |
| 485 | { |
| 486 | auto tryAllocate = [&] { |
| 487 | Vector<Tmp*, 8> usesToAlloc; |
| 488 | Vector<Tmp*, 8> defsToAlloc; |
| 489 | |
| 490 | inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) { |
| 491 | if (tmp.isReg()) |
| 492 | return; |
| 493 | |
| 494 | // We treat Use+Def as a use. |
| 495 | if (Arg::isAnyUse(role)) |
| 496 | usesToAlloc.append(&tmp); |
| 497 | else if (Arg::isAnyDef(role)) |
| 498 | defsToAlloc.append(&tmp); |
| 499 | }); |
| 500 | |
| 501 | auto tryAllocateTmps = [&] (auto& vector, bool isDef) { |
| 502 | bool success = true; |
| 503 | for (Tmp* tmp : vector) |
| 504 | success &= assignTmp(*tmp, tmp->bank(), isDef); |
| 505 | return success; |
| 506 | }; |
| 507 | |
| 508 | // We first handle uses, then defs. We want to be able to tell the register allocator |
| 509 | // which tmps need to be loaded from memory into their assigned register. Those such |
| 510 | // tmps are uses. Defs don't need to be reloaded since we're defining them. However, |
| 511 | // some tmps may both be used and defd. So we handle uses first since forEachTmp could |
| 512 | // walk uses/defs in any order. |
| 513 | bool success = true; |
| 514 | success &= tryAllocateTmps(usesToAlloc, false); |
| 515 | success &= tryAllocateTmps(defsToAlloc, true); |
| 516 | return success; |
| 517 | }; |
| 518 | |
| 519 | // We first allocate trying to give any Tmp a register. If that makes us exhaust the |
| 520 | // available registers, we convert anything that accepts stack to be a stack addr |
| 521 | // instead. This can happen for programs Insts that take in many args, but most |
| 522 | // args can just be stack values. |
| 523 | bool success = tryAllocate(); |
| 524 | if (!success) { |
| 525 | RELEASE_ASSERT(!isReplayingSameInst); // We should only need to do the below at most once per inst. |
| 526 | |
| 527 | // We need to capture the register state before we start spilling things |
| 528 | // since we may have multiple arguments that are the same register. |
| 529 | IndexMap<Reg, Tmp> allocationSnapshot = currentAllocation; |
| 530 | |
| 531 | // We rewind this Inst to be in its previous state, however, if any arg admits stack, |
| 532 | // we move to providing that arg in stack form. This will allow us to fully allocate |
| 533 | // this inst when we rewind. |
| 534 | inst.forEachArg([&] (Arg& arg, Arg::Role, Bank, Width) { |
| 535 | if (!arg.isTmp()) |
| 536 | return; |
| 537 | |
| 538 | Tmp tmp = arg.tmp(); |
| 539 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
| 540 | return; |
| 541 | |
| 542 | if (tmp.isReg()) { |
| 543 | Tmp originalTmp = allocationSnapshot[tmp.reg()]; |
| 544 | if (originalTmp.isReg()) { |
| 545 | ASSERT(tmp.reg() == originalTmp.reg()); |
| 546 | // This means this Inst referred to this reg directly. We leave these as is. |
| 547 | return; |
| 548 | } |
| 549 | tmp = originalTmp; |
| 550 | } |
| 551 | |
| 552 | if (!inst.admitsStack(arg)) { |
| 553 | arg = tmp; |
| 554 | return; |
| 555 | } |
| 556 | |
| 557 | auto& entry = m_map[tmp]; |
| 558 | if (Reg reg = entry.reg) |
| 559 | spill(tmp, reg); |
| 560 | |
| 561 | arg = Arg::addr(Tmp(GPRInfo::callFrameRegister), entry.spillSlot->offsetFromFP()); |
| 562 | }); |
| 563 | |
| 564 | --instIndex; |
| 565 | isReplayingSameInst = true; |
| 566 | continue; |
| 567 | } |
| 568 | |
| 569 | isReplayingSameInst = false; |
| 570 | } |
| 571 | |
| 572 | if (m_code.needsUsedRegisters() && inst.kind.opcode == Patch) { |
| 573 | freeDeadTmpsIfNeeded(); |
| 574 | RegisterSet registerSet; |
| 575 | for (size_t i = 0; i < currentAllocation.size(); ++i) { |
| 576 | if (currentAllocation[i]) |
| 577 | registerSet.set(Reg::fromIndex(i)); |
| 578 | } |
| 579 | inst.reportUsedRegisters(registerSet); |
| 580 | } |
| 581 | |
| 582 | if (inst.isTerminal() && block->numSuccessors()) { |
| 583 | // By default, we spill everything between block boundaries. However, we have a small |
| 584 | // heuristic to pass along register state. We should eventually make this better. |
| 585 | // What we do now is if we have a successor with a single predecessor (us), and we |
| 586 | // haven't yet generated code for it, we give it our register state. If all our successors |
| 587 | // can take on our register state, we don't flush at the end of this block. |
| 588 | |
| 589 | bool everySuccessorGetsOurRegisterState = true; |
| 590 | for (unsigned i = 0; i < block->numSuccessors(); ++i) { |
| 591 | BasicBlock* successor = block->successorBlock(i); |
| 592 | if (successor->numPredecessors() == 1 && !context.blockLabels[successor]->isSet()) |
| 593 | currentAllocationMap[successor] = currentAllocation; |
| 594 | else |
| 595 | everySuccessorGetsOurRegisterState = false; |
| 596 | } |
| 597 | if (!everySuccessorGetsOurRegisterState) { |
| 598 | for (Tmp tmp : liveness.liveAtTail(block)) { |
| 599 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
| 600 | continue; |
| 601 | if (Reg reg = m_map[tmp].reg) |
| 602 | flush(tmp, reg); |
| 603 | } |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | if (!inst.isTerminal()) { |
| 608 | CCallHelpers::Jump jump = inst.generate(*m_jit, context); |
| 609 | ASSERT_UNUSED(jump, !jump.isSet()); |
| 610 | |
| 611 | for (Reg reg : clobberedRegisters) { |
| 612 | Tmp tmp(reg); |
| 613 | ASSERT(currentAllocation[reg] == tmp); |
| 614 | m_availableRegs[tmp.bank()].set(reg); |
| 615 | m_currentAllocation->at(reg) = Tmp(); |
| 616 | m_map[tmp].reg = Reg(); |
| 617 | } |
| 618 | } else { |
| 619 | bool needsToGenerate = true; |
| 620 | if (inst.kind.opcode == Jump && block->successorBlock(0) == m_code.findNextBlock(block)) |
| 621 | needsToGenerate = false; |
| 622 | |
| 623 | if (isReturn(inst.kind.opcode)) { |
| 624 | needsToGenerate = false; |
| 625 | |
| 626 | // We currently don't represent the full epilogue in Air, so we need to |
| 627 | // have this override. |
| 628 | if (m_code.frameSize()) { |
| 629 | m_jit->emitRestore(m_code.calleeSaveRegisterAtOffsetList()); |
| 630 | m_jit->emitFunctionEpilogue(); |
| 631 | } else |
| 632 | m_jit->emitFunctionEpilogueWithEmptyFrame(); |
| 633 | m_jit->ret(); |
| 634 | } |
| 635 | |
| 636 | if (needsToGenerate) { |
| 637 | CCallHelpers::Jump jump = inst.generate(*m_jit, context); |
| 638 | |
| 639 | // The jump won't be set for patchpoints. It won't be set for Oops because then it won't have |
| 640 | // any successors. |
| 641 | if (jump.isSet()) { |
| 642 | switch (block->numSuccessors()) { |
| 643 | case 1: |
| 644 | link(jump, block->successorBlock(0)); |
| 645 | break; |
| 646 | case 2: |
| 647 | link(jump, block->successorBlock(0)); |
| 648 | if (block->successorBlock(1) != m_code.findNextBlock(block)) |
| 649 | link(m_jit->jump(), block->successorBlock(1)); |
| 650 | break; |
| 651 | default: |
| 652 | RELEASE_ASSERT_NOT_REACHED(); |
| 653 | break; |
| 654 | } |
| 655 | } |
| 656 | } |
| 657 | } |
| 658 | |
| 659 | auto endLabel = m_jit->labelIgnoringWatchpoints(); |
| 660 | if (disassembler) |
| 661 | disassembler->addInst(&inst, startLabel, endLabel); |
| 662 | |
| 663 | ++m_globalInstIndex; |
| 664 | } |
| 665 | |
| 666 | // Registers usually get spilled at block boundaries. We do it this way since we don't |
| 667 | // want to iterate the entire TmpMap, since usually #Tmps >> #Regs. We may not actually spill |
| 668 | // all registers, but at the top of this loop we handle that case by pre-populating register |
| 669 | // state. Here, we just clear this map. After this loop, this map should contain only |
| 670 | // null entries. |
| 671 | for (size_t i = 0; i < currentAllocation.size(); ++i) { |
| 672 | if (Tmp tmp = currentAllocation[i]) |
| 673 | m_map[tmp].reg = Reg(); |
| 674 | } |
| 675 | } |
| 676 | |
| 677 | for (auto& entry : m_blocksAfterTerminalPatchForSpilling) { |
| 678 | entry.value.jump.linkTo(m_jit->label(), m_jit); |
| 679 | const HashMap<Tmp, Arg*>& spills = entry.value.defdTmps; |
| 680 | for (auto& entry : spills) { |
| 681 | Arg* arg = entry.value; |
| 682 | if (!arg->isTmp()) |
| 683 | continue; |
| 684 | Tmp originalTmp = entry.key; |
| 685 | Tmp currentTmp = arg->tmp(); |
| 686 | ASSERT_WITH_MESSAGE(currentTmp.isReg(), "We already did register allocation so we should have assigned this Tmp to a register." ); |
| 687 | flush(originalTmp, currentTmp.reg()); |
| 688 | } |
| 689 | m_jit->jump().linkTo(entry.value.continueLabel, m_jit); |
| 690 | } |
| 691 | |
| 692 | context.currentBlock = nullptr; |
| 693 | context.indexInBlock = UINT_MAX; |
| 694 | |
| 695 | Vector<CCallHelpers::Label> entrypointLabels(m_code.numEntrypoints()); |
| 696 | for (unsigned i = m_code.numEntrypoints(); i--;) |
| 697 | entrypointLabels[i] = *context.blockLabels[m_code.entrypoint(i).block()]; |
| 698 | m_code.setEntrypointLabels(WTFMove(entrypointLabels)); |
| 699 | |
| 700 | if (disassembler) |
| 701 | disassembler->startLatePath(*m_jit); |
| 702 | |
| 703 | // FIXME: Make late paths have Origins: https://bugs.webkit.org/show_bug.cgi?id=153689 |
| 704 | for (auto& latePath : context.latePaths) |
| 705 | latePath->run(*m_jit, context); |
| 706 | |
| 707 | if (disassembler) |
| 708 | disassembler->endLatePath(*m_jit); |
| 709 | } |
| 710 | |
| 711 | } } } // namespace JSC::B3::Air |
| 712 | |
| 713 | #endif // ENABLE(B3_JIT) |
| 714 | |