| 1 | /* |
| 2 | * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include "AirGenerate.h" |
| 28 | |
| 29 | #if ENABLE(B3_JIT) |
| 30 | |
| 31 | #include "AirAllocateRegistersAndStackAndGenerateCode.h" |
| 32 | #include "AirAllocateRegistersAndStackByLinearScan.h" |
| 33 | #include "AirAllocateRegistersByGraphColoring.h" |
| 34 | #include "AirAllocateStackByGraphColoring.h" |
| 35 | #include "AirCode.h" |
| 36 | #include "AirEliminateDeadCode.h" |
| 37 | #include "AirFixObviousSpills.h" |
| 38 | #include "AirFixPartialRegisterStalls.h" |
| 39 | #include "AirGenerationContext.h" |
| 40 | #include "AirHandleCalleeSaves.h" |
| 41 | #include "AirLiveness.h" |
| 42 | #include "AirLogRegisterPressure.h" |
| 43 | #include "AirLowerAfterRegAlloc.h" |
| 44 | #include "AirLowerEntrySwitch.h" |
| 45 | #include "AirLowerMacros.h" |
| 46 | #include "AirLowerStackArgs.h" |
| 47 | #include "AirOpcodeUtils.h" |
| 48 | #include "AirOptimizeBlockOrder.h" |
| 49 | #include "AirReportUsedRegisters.h" |
| 50 | #include "AirSimplifyCFG.h" |
| 51 | #include "AirStackAllocation.h" |
| 52 | #include "AirTmpMap.h" |
| 53 | #include "AirValidate.h" |
| 54 | #include "B3Common.h" |
| 55 | #include "B3Procedure.h" |
| 56 | #include "B3TimingScope.h" |
| 57 | #include "B3ValueInlines.h" |
| 58 | #include "CCallHelpers.h" |
| 59 | #include "DisallowMacroScratchRegisterUsage.h" |
| 60 | #include "LinkBuffer.h" |
| 61 | #include <wtf/IndexMap.h> |
| 62 | |
| 63 | namespace JSC { namespace B3 { namespace Air { |
| 64 | |
| 65 | void prepareForGeneration(Code& code) |
| 66 | { |
| 67 | TimingScope timingScope("Air::prepareForGeneration" ); |
| 68 | |
| 69 | // If we're doing super verbose dumping, the phase scope of any phase will already do a dump. |
| 70 | if (shouldDumpIR(AirMode) && !shouldDumpIRAtEachPhase(AirMode)) { |
| 71 | dataLog("Initial air:\n" ); |
| 72 | dataLog(code); |
| 73 | } |
| 74 | |
| 75 | // We don't expect the incoming code to have predecessors computed. |
| 76 | code.resetReachability(); |
| 77 | |
| 78 | if (shouldValidateIR()) |
| 79 | validate(code); |
| 80 | |
| 81 | if (!code.optLevel()) { |
| 82 | lowerMacros(code); |
| 83 | |
| 84 | // FIXME: The name of this phase doesn't make much sense in O0 since we do this before |
| 85 | // register allocation. |
| 86 | lowerAfterRegAlloc(code); |
| 87 | |
| 88 | // Actually create entrypoints. |
| 89 | lowerEntrySwitch(code); |
| 90 | |
| 91 | // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high |
| 92 | // frequency successor is also the fall-through target. |
| 93 | optimizeBlockOrder(code); |
| 94 | |
| 95 | if (shouldValidateIR()) |
| 96 | validate(code); |
| 97 | |
| 98 | if (shouldDumpIR(AirMode)) { |
| 99 | dataLog("Air after " , code.lastPhaseName(), ", before generation:\n" ); |
| 100 | dataLog(code); |
| 101 | } |
| 102 | |
| 103 | code.m_generateAndAllocateRegisters = std::make_unique<GenerateAndAllocateRegisters>(code); |
| 104 | code.m_generateAndAllocateRegisters->prepareForGeneration(); |
| 105 | |
| 106 | return; |
| 107 | } |
| 108 | |
| 109 | simplifyCFG(code); |
| 110 | |
| 111 | lowerMacros(code); |
| 112 | |
| 113 | // This is where we run our optimizations and transformations. |
| 114 | // FIXME: Add Air optimizations. |
| 115 | // https://bugs.webkit.org/show_bug.cgi?id=150456 |
| 116 | |
| 117 | eliminateDeadCode(code); |
| 118 | |
| 119 | if (code.optLevel() == 1) { |
| 120 | // When we're compiling quickly, we do register and stack allocation in one linear scan |
| 121 | // phase. It's fast because it computes liveness only once. |
| 122 | allocateRegistersAndStackByLinearScan(code); |
| 123 | |
| 124 | if (Options::logAirRegisterPressure()) { |
| 125 | dataLog("Register pressure after register allocation:\n" ); |
| 126 | logRegisterPressure(code); |
| 127 | } |
| 128 | |
| 129 | // We may still need to do post-allocation lowering. Doing it after both register and |
| 130 | // stack allocation is less optimal, but it works fine. |
| 131 | lowerAfterRegAlloc(code); |
| 132 | } else { |
| 133 | // NOTE: B3 -O2 generates code that runs 1.5x-2x faster than code generated by -O1. |
| 134 | // Most of this performance benefit is from -O2's graph coloring register allocation |
| 135 | // and stack allocation pipeline, which you see below. |
| 136 | |
| 137 | // Register allocation for all the Tmps that do not have a corresponding machine |
| 138 | // register. After this phase, every Tmp has a reg. |
| 139 | allocateRegistersByGraphColoring(code); |
| 140 | |
| 141 | if (Options::logAirRegisterPressure()) { |
| 142 | dataLog("Register pressure after register allocation:\n" ); |
| 143 | logRegisterPressure(code); |
| 144 | } |
| 145 | |
| 146 | // This replaces uses of spill slots with registers or constants if possible. It |
| 147 | // does this by minimizing the amount that we perturb the already-chosen register |
| 148 | // allocation. It may extend the live ranges of registers though. |
| 149 | fixObviousSpills(code); |
| 150 | |
| 151 | lowerAfterRegAlloc(code); |
| 152 | |
| 153 | // This does first-fit allocation of stack slots using an interference graph plus a |
| 154 | // bunch of other optimizations. |
| 155 | allocateStackByGraphColoring(code); |
| 156 | } |
| 157 | |
| 158 | // This turns all Stack and CallArg Args into Addr args that use the frame pointer. |
| 159 | lowerStackArgs(code); |
| 160 | |
| 161 | // If we coalesced moves then we can unbreak critical edges. This is the main reason for this |
| 162 | // phase. |
| 163 | simplifyCFG(code); |
| 164 | |
| 165 | // This is needed to satisfy a requirement of B3::StackmapValue. This also removes dead |
| 166 | // code. We can avoid running this when certain optimizations are disabled. |
| 167 | if (code.optLevel() >= 2 || code.needsUsedRegisters()) |
| 168 | reportUsedRegisters(code); |
| 169 | |
| 170 | // Attempt to remove false dependencies between instructions created by partial register changes. |
| 171 | // This must be executed as late as possible as it depends on the instructions order and register |
| 172 | // use. We _must_ run this after reportUsedRegisters(), since that kills variable assignments |
| 173 | // that seem dead. Luckily, this phase does not change register liveness, so that's OK. |
| 174 | fixPartialRegisterStalls(code); |
| 175 | |
| 176 | // Actually create entrypoints. |
| 177 | lowerEntrySwitch(code); |
| 178 | |
| 179 | // The control flow graph can be simplified further after we have lowered EntrySwitch. |
| 180 | simplifyCFG(code); |
| 181 | |
| 182 | // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high |
| 183 | // frequency successor is also the fall-through target. |
| 184 | optimizeBlockOrder(code); |
| 185 | |
| 186 | if (shouldValidateIR()) |
| 187 | validate(code); |
| 188 | |
| 189 | // Do a final dump of Air. Note that we have to do this even if we are doing per-phase dumping, |
| 190 | // since the final generation is not a phase. |
| 191 | if (shouldDumpIR(AirMode)) { |
| 192 | dataLog("Air after " , code.lastPhaseName(), ", before generation:\n" ); |
| 193 | dataLog(code); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | static void generateWithAlreadyAllocatedRegisters(Code& code, CCallHelpers& jit) |
| 198 | { |
| 199 | TimingScope timingScope("Air::generate" ); |
| 200 | |
| 201 | DisallowMacroScratchRegisterUsage disallowScratch(jit); |
| 202 | |
| 203 | // And now, we generate code. |
| 204 | GenerationContext context; |
| 205 | context.code = &code; |
| 206 | context.blockLabels.resize(code.size()); |
| 207 | for (BasicBlock* block : code) |
| 208 | context.blockLabels[block] = Box<CCallHelpers::Label>::create(); |
| 209 | IndexMap<BasicBlock*, CCallHelpers::JumpList> blockJumps(code.size()); |
| 210 | |
| 211 | auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) { |
| 212 | if (context.blockLabels[target]->isSet()) { |
| 213 | jump.linkTo(*context.blockLabels[target], &jit); |
| 214 | return; |
| 215 | } |
| 216 | |
| 217 | blockJumps[target].append(jump); |
| 218 | }; |
| 219 | |
| 220 | PCToOriginMap& pcToOriginMap = code.proc().pcToOriginMap(); |
| 221 | auto addItem = [&] (Inst& inst) { |
| 222 | if (!inst.origin) { |
| 223 | pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), Origin()); |
| 224 | return; |
| 225 | } |
| 226 | pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), inst.origin->origin()); |
| 227 | }; |
| 228 | |
| 229 | Disassembler* disassembler = code.disassembler(); |
| 230 | |
| 231 | for (BasicBlock* block : code) { |
| 232 | context.currentBlock = block; |
| 233 | context.indexInBlock = UINT_MAX; |
| 234 | blockJumps[block].link(&jit); |
| 235 | CCallHelpers::Label label = jit.label(); |
| 236 | *context.blockLabels[block] = label; |
| 237 | |
| 238 | if (disassembler) |
| 239 | disassembler->startBlock(block, jit); |
| 240 | |
| 241 | if (Optional<unsigned> entrypointIndex = code.entrypointIndex(block)) { |
| 242 | ASSERT(code.isEntrypoint(block)); |
| 243 | |
| 244 | if (disassembler) |
| 245 | disassembler->startEntrypoint(jit); |
| 246 | |
| 247 | code.prologueGeneratorForEntrypoint(*entrypointIndex)->run(jit, code); |
| 248 | |
| 249 | if (disassembler) |
| 250 | disassembler->endEntrypoint(jit); |
| 251 | } else |
| 252 | ASSERT(!code.isEntrypoint(block)); |
| 253 | |
| 254 | ASSERT(block->size() >= 1); |
| 255 | for (unsigned i = 0; i < block->size() - 1; ++i) { |
| 256 | context.indexInBlock = i; |
| 257 | Inst& inst = block->at(i); |
| 258 | addItem(inst); |
| 259 | auto start = jit.labelIgnoringWatchpoints(); |
| 260 | CCallHelpers::Jump jump = inst.generate(jit, context); |
| 261 | ASSERT_UNUSED(jump, !jump.isSet()); |
| 262 | auto end = jit.labelIgnoringWatchpoints(); |
| 263 | if (disassembler) |
| 264 | disassembler->addInst(&inst, start, end); |
| 265 | } |
| 266 | |
| 267 | context.indexInBlock = block->size() - 1; |
| 268 | |
| 269 | if (block->last().kind.opcode == Jump |
| 270 | && block->successorBlock(0) == code.findNextBlock(block)) |
| 271 | continue; |
| 272 | |
| 273 | addItem(block->last()); |
| 274 | |
| 275 | if (isReturn(block->last().kind.opcode)) { |
| 276 | // We currently don't represent the full prologue/epilogue in Air, so we need to |
| 277 | // have this override. |
| 278 | auto start = jit.labelIgnoringWatchpoints(); |
| 279 | if (code.frameSize()) { |
| 280 | jit.emitRestore(code.calleeSaveRegisterAtOffsetList()); |
| 281 | jit.emitFunctionEpilogue(); |
| 282 | } else |
| 283 | jit.emitFunctionEpilogueWithEmptyFrame(); |
| 284 | jit.ret(); |
| 285 | addItem(block->last()); |
| 286 | auto end = jit.labelIgnoringWatchpoints(); |
| 287 | if (disassembler) |
| 288 | disassembler->addInst(&block->last(), start, end); |
| 289 | continue; |
| 290 | } |
| 291 | |
| 292 | auto start = jit.labelIgnoringWatchpoints(); |
| 293 | CCallHelpers::Jump jump = block->last().generate(jit, context); |
| 294 | auto end = jit.labelIgnoringWatchpoints(); |
| 295 | if (disassembler) |
| 296 | disassembler->addInst(&block->last(), start, end); |
| 297 | |
| 298 | // The jump won't be set for patchpoints. It won't be set for Oops because then it won't have |
| 299 | // any successors. |
| 300 | if (jump.isSet()) { |
| 301 | switch (block->numSuccessors()) { |
| 302 | case 1: |
| 303 | link(jump, block->successorBlock(0)); |
| 304 | break; |
| 305 | case 2: |
| 306 | link(jump, block->successorBlock(0)); |
| 307 | if (block->successorBlock(1) != code.findNextBlock(block)) |
| 308 | link(jit.jump(), block->successorBlock(1)); |
| 309 | break; |
| 310 | default: |
| 311 | RELEASE_ASSERT_NOT_REACHED(); |
| 312 | break; |
| 313 | } |
| 314 | } |
| 315 | addItem(block->last()); |
| 316 | } |
| 317 | |
| 318 | context.currentBlock = nullptr; |
| 319 | context.indexInBlock = UINT_MAX; |
| 320 | |
| 321 | Vector<CCallHelpers::Label> entrypointLabels(code.numEntrypoints()); |
| 322 | for (unsigned i = code.numEntrypoints(); i--;) |
| 323 | entrypointLabels[i] = *context.blockLabels[code.entrypoint(i).block()]; |
| 324 | code.setEntrypointLabels(WTFMove(entrypointLabels)); |
| 325 | |
| 326 | pcToOriginMap.appendItem(jit.label(), Origin()); |
| 327 | // FIXME: Make late paths have Origins: https://bugs.webkit.org/show_bug.cgi?id=153689 |
| 328 | if (disassembler) |
| 329 | disassembler->startLatePath(jit); |
| 330 | |
| 331 | for (auto& latePath : context.latePaths) |
| 332 | latePath->run(jit, context); |
| 333 | |
| 334 | if (disassembler) |
| 335 | disassembler->endLatePath(jit); |
| 336 | pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), Origin()); |
| 337 | } |
| 338 | |
| 339 | void generate(Code& code, CCallHelpers& jit) |
| 340 | { |
| 341 | if (code.optLevel()) |
| 342 | generateWithAlreadyAllocatedRegisters(code, jit); |
| 343 | else |
| 344 | code.m_generateAndAllocateRegisters->generate(jit); |
| 345 | } |
| 346 | |
| 347 | } } } // namespace JSC::B3::Air |
| 348 | |
| 349 | #endif // ENABLE(B3_JIT) |
| 350 | |