| 1 | /* |
| 2 | * Copyright (C) 2016-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include "AirEmitShuffle.h" |
| 28 | |
| 29 | #if ENABLE(B3_JIT) |
| 30 | |
| 31 | #include "AirCode.h" |
| 32 | #include "AirInstInlines.h" |
| 33 | #include <wtf/GraphNodeWorklist.h> |
| 34 | #include <wtf/ListDump.h> |
| 35 | |
| 36 | namespace JSC { namespace B3 { namespace Air { |
| 37 | |
| 38 | namespace { |
| 39 | |
| 40 | namespace AirEmitShuffleInternal { |
| 41 | static const bool verbose = false; |
| 42 | } |
| 43 | |
| 44 | template<typename Functor> |
| 45 | Tmp findPossibleScratch(Code& code, Bank bank, const Functor& functor) { |
| 46 | for (Reg reg : code.regsInPriorityOrder(bank)) { |
| 47 | Tmp tmp(reg); |
| 48 | if (functor(tmp)) |
| 49 | return tmp; |
| 50 | } |
| 51 | return Tmp(); |
| 52 | } |
| 53 | |
| 54 | Tmp findPossibleScratch(Code& code, Bank bank, const Arg& arg1, const Arg& arg2) { |
| 55 | return findPossibleScratch( |
| 56 | code, bank, |
| 57 | [&] (Tmp tmp) -> bool { |
| 58 | return !arg1.usesTmp(tmp) && !arg2.usesTmp(tmp); |
| 59 | }); |
| 60 | } |
| 61 | |
| 62 | // Example: (a => b, b => a, a => c, b => d) |
| 63 | struct Rotate { |
| 64 | Vector<ShufflePair> loop; // in the example, this is the loop: (a => b, b => a) |
| 65 | Vector<ShufflePair> fringe; // in the example, these are the associated shifts: (a => c, b => d) |
| 66 | }; |
| 67 | |
| 68 | } // anonymous namespace |
| 69 | |
| 70 | Bank ShufflePair::bank() const |
| 71 | { |
| 72 | if (src().isMemory() && dst().isMemory() && width() > pointerWidth()) { |
| 73 | // 8-byte memory-to-memory moves on a 32-bit platform are best handled as float moves. |
| 74 | return FP; |
| 75 | } |
| 76 | |
| 77 | if (src().isGP() && dst().isGP()) { |
| 78 | // This means that gpPairs gets memory-to-memory shuffles. The assumption is that we |
| 79 | // can do that more efficiently using GPRs, except in the special case above. |
| 80 | return GP; |
| 81 | } |
| 82 | |
| 83 | return FP; |
| 84 | } |
| 85 | |
| 86 | Vector<Inst, 2> ShufflePair::insts(Code& code, Value* origin) const |
| 87 | { |
| 88 | if (UNLIKELY(src().isMemory() && dst().isMemory())) |
| 89 | return { Inst(moveFor(bank(), width()), origin, src(), dst(), code.newTmp(bank())) }; |
| 90 | |
| 91 | if (isValidForm(moveFor(bank(), width()), src().kind(), dst().kind())) |
| 92 | return { Inst(moveFor(bank(), width()), origin, src(), dst()) }; |
| 93 | |
| 94 | // We must be a store immediate or a move immediate if we reach here. The reason: |
| 95 | // 1. We're not a mem->mem move, given the above check. |
| 96 | // 2. It's always valid to do a load from Addr into a tmp using Move/Move32/MoveFloat/MoveDouble. |
| 97 | ASSERT(isValidForm(moveFor(bank(), width()), Arg::Addr, Arg::Tmp)); |
| 98 | // 3. It's also always valid to do a Tmp->Tmp move. |
| 99 | ASSERT(isValidForm(moveFor(bank(), width()), Arg::Tmp, Arg::Tmp)); |
| 100 | // 4. It's always valid to do a Tmp->Addr store. |
| 101 | ASSERT(isValidForm(moveFor(bank(), width()), Arg::Tmp, Arg::Addr)); |
| 102 | |
| 103 | ASSERT(src().isSomeImm()); |
| 104 | Tmp tmp = code.newTmp(bank()); |
| 105 | ASSERT(isValidForm(Move, Arg::BigImm, Arg::Tmp)); |
| 106 | ASSERT(isValidForm(moveFor(bank(), width()), Arg::Tmp, dst().kind())); |
| 107 | return { |
| 108 | Inst(Move, origin, Arg::bigImm(src().value()), tmp), |
| 109 | Inst(moveFor(bank(), width()), origin, tmp, dst()), |
| 110 | }; |
| 111 | } |
| 112 | |
| 113 | void ShufflePair::dump(PrintStream& out) const |
| 114 | { |
| 115 | out.print(width(), ":" , src(), "=>" , dst()); |
| 116 | } |
| 117 | |
| 118 | Inst createShuffle(Value* origin, const Vector<ShufflePair>& pairs) |
| 119 | { |
| 120 | Inst result(Shuffle, origin); |
| 121 | for (const ShufflePair& pair : pairs) |
| 122 | result.append(pair.src(), pair.dst(), Arg::widthArg(pair.width())); |
| 123 | return result; |
| 124 | } |
| 125 | |
| 126 | Vector<Inst> emitShuffle( |
| 127 | Code& code, Vector<ShufflePair> pairs, std::array<Arg, 2> scratches, Bank bank, |
| 128 | Value* origin) |
| 129 | { |
| 130 | if (AirEmitShuffleInternal::verbose) { |
| 131 | dataLog( |
| 132 | "Dealing with pairs: " , listDump(pairs), " and scratches " , scratches[0], ", " , |
| 133 | scratches[1], "\n" ); |
| 134 | } |
| 135 | |
| 136 | pairs.removeAllMatching( |
| 137 | [&] (const ShufflePair& pair) -> bool { |
| 138 | return pair.src() == pair.dst(); |
| 139 | }); |
| 140 | |
| 141 | // First validate that this is the kind of shuffle that we know how to deal with. |
| 142 | #if !ASSERT_DISABLED |
| 143 | for (const ShufflePair& pair : pairs) { |
| 144 | ASSERT(pair.src().isBank(bank)); |
| 145 | ASSERT(pair.dst().isBank(bank)); |
| 146 | ASSERT(pair.dst().isTmp() || pair.dst().isMemory()); |
| 147 | } |
| 148 | #endif // !ASSERT_DISABLED |
| 149 | |
| 150 | // There are two possible kinds of operations that we will do: |
| 151 | // |
| 152 | // - Shift. Example: (a => b, b => c). We emit this as "Move b, c; Move a, b". This only requires |
| 153 | // scratch registers if there are memory->memory moves. We want to find as many of these as |
| 154 | // possible because they are cheaper. Note that shifts can involve the same source mentioned |
| 155 | // multiple times. Example: (a => b, a => c, b => d, b => e). |
| 156 | // |
| 157 | // - Rotate. Example: (a => b, b => a). We want to emit this as "Swap a, b", but that instruction |
| 158 | // may not be available, in which case we may need a scratch register or a scratch memory |
| 159 | // location. A gnarlier example is (a => b, b => c, c => a). We can emit this as "Swap b, c; |
| 160 | // Swap a, b". Note that swapping has to be careful about differing widths. |
| 161 | // |
| 162 | // Note that a rotate can have "fringe". For example, we might have (a => b, b => a, a =>c, |
| 163 | // b => d). This has a rotate loop (a => b, b => a) and some fringe (a => c, b => d). We treat |
| 164 | // the whole thing as a single rotate. |
| 165 | // |
| 166 | // We will find multiple disjoint such operations. We can execute them in any order. |
| 167 | |
| 168 | // We interpret these as Moves that should be executed backwards. All shifts are keyed by their |
| 169 | // starting source. |
| 170 | HashMap<Arg, Vector<ShufflePair>> shifts; |
| 171 | |
| 172 | // We interpret these as Swaps over src()'s that should be executed backwards, i.e. for a list |
| 173 | // of size 3 we would do "Swap list[1].src(), list[2].src(); Swap list[0].src(), list[1].src()". |
| 174 | // Note that we actually can't do that if the widths don't match or other bad things happen. |
| 175 | // But, prior to executing all of that, we need to execute the fringe: the shifts comming off the |
| 176 | // rotate. |
| 177 | Vector<Rotate> rotates; |
| 178 | |
| 179 | { |
| 180 | HashMap<Arg, Vector<ShufflePair>> mapping; |
| 181 | for (const ShufflePair& pair : pairs) |
| 182 | mapping.add(pair.src(), Vector<ShufflePair>()).iterator->value.append(pair); |
| 183 | |
| 184 | Vector<ShufflePair> currentPairs; |
| 185 | |
| 186 | while (!mapping.isEmpty()) { |
| 187 | ASSERT(currentPairs.isEmpty()); |
| 188 | Arg originalSrc = mapping.begin()->key; |
| 189 | ASSERT(!shifts.contains(originalSrc)); |
| 190 | if (AirEmitShuffleInternal::verbose) |
| 191 | dataLog("Processing from " , originalSrc, "\n" ); |
| 192 | |
| 193 | GraphNodeWorklist<Arg> worklist; |
| 194 | worklist.push(originalSrc); |
| 195 | while (Arg src = worklist.pop()) { |
| 196 | HashMap<Arg, Vector<ShufflePair>>::iterator iter = mapping.find(src); |
| 197 | if (iter == mapping.end()) { |
| 198 | // With a shift it's possible that we previously built the tail of this shift. |
| 199 | // See if that's the case now. |
| 200 | if (AirEmitShuffleInternal::verbose) |
| 201 | dataLog("Trying to append shift at " , src, "\n" ); |
| 202 | currentPairs.appendVector(shifts.take(src)); |
| 203 | continue; |
| 204 | } |
| 205 | Vector<ShufflePair> pairs = WTFMove(iter->value); |
| 206 | mapping.remove(iter); |
| 207 | |
| 208 | for (const ShufflePair& pair : pairs) { |
| 209 | currentPairs.append(pair); |
| 210 | ASSERT(pair.src() == src); |
| 211 | worklist.push(pair.dst()); |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | ASSERT(currentPairs.size()); |
| 216 | ASSERT(currentPairs[0].src() == originalSrc); |
| 217 | |
| 218 | if (AirEmitShuffleInternal::verbose) |
| 219 | dataLog("currentPairs = " , listDump(currentPairs), "\n" ); |
| 220 | |
| 221 | bool isRotate = false; |
| 222 | for (const ShufflePair& pair : currentPairs) { |
| 223 | if (pair.dst() == originalSrc) { |
| 224 | isRotate = true; |
| 225 | break; |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | if (isRotate) { |
| 230 | if (AirEmitShuffleInternal::verbose) |
| 231 | dataLog("It's a rotate.\n" ); |
| 232 | Rotate rotate; |
| 233 | |
| 234 | // The common case is that the rotate does not have fringe. The only way to |
| 235 | // check for this is to examine the whole rotate. |
| 236 | bool ok; |
| 237 | if (currentPairs.last().dst() == originalSrc) { |
| 238 | ok = true; |
| 239 | for (unsigned i = currentPairs.size() - 1; i--;) |
| 240 | ok &= currentPairs[i].dst() == currentPairs[i + 1].src(); |
| 241 | } else |
| 242 | ok = false; |
| 243 | |
| 244 | if (ok) |
| 245 | rotate.loop = WTFMove(currentPairs); |
| 246 | else { |
| 247 | // This is the slow path. The rotate has fringe. |
| 248 | |
| 249 | HashMap<Arg, ShufflePair> dstMapping; |
| 250 | for (const ShufflePair& pair : currentPairs) |
| 251 | dstMapping.add(pair.dst(), pair); |
| 252 | |
| 253 | ShufflePair pair = dstMapping.take(originalSrc); |
| 254 | for (;;) { |
| 255 | rotate.loop.append(pair); |
| 256 | |
| 257 | auto iter = dstMapping.find(pair.src()); |
| 258 | if (iter == dstMapping.end()) |
| 259 | break; |
| 260 | pair = iter->value; |
| 261 | dstMapping.remove(iter); |
| 262 | } |
| 263 | |
| 264 | rotate.loop.reverse(); |
| 265 | |
| 266 | // Make sure that the fringe appears in the same order as how it appeared in the |
| 267 | // currentPairs, since that's the DFS order. |
| 268 | for (const ShufflePair& pair : currentPairs) { |
| 269 | // But of course we only include it if it's not in the loop. |
| 270 | if (dstMapping.contains(pair.dst())) |
| 271 | rotate.fringe.append(pair); |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | // If the graph search terminates because we returned to the first source, then the |
| 276 | // pair list has to have a very particular shape. |
| 277 | for (unsigned i = rotate.loop.size() - 1; i--;) |
| 278 | ASSERT(rotate.loop[i].dst() == rotate.loop[i + 1].src()); |
| 279 | rotates.append(WTFMove(rotate)); |
| 280 | currentPairs.shrink(0); |
| 281 | } else { |
| 282 | if (AirEmitShuffleInternal::verbose) |
| 283 | dataLog("It's a shift.\n" ); |
| 284 | shifts.add(originalSrc, WTFMove(currentPairs)); |
| 285 | } |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | if (AirEmitShuffleInternal::verbose) { |
| 290 | dataLog("Shifts:\n" ); |
| 291 | for (auto& entry : shifts) |
| 292 | dataLog(" " , entry.key, ": " , listDump(entry.value), "\n" ); |
| 293 | dataLog("Rotates:\n" ); |
| 294 | for (auto& rotate : rotates) |
| 295 | dataLog(" loop = " , listDump(rotate.loop), ", fringe = " , listDump(rotate.fringe), "\n" ); |
| 296 | } |
| 297 | |
| 298 | // In the worst case, we need two scratch registers. The way we do this is that the client passes |
| 299 | // us what scratch registers he happens to have laying around. We will need scratch registers in |
| 300 | // the following cases: |
| 301 | // |
| 302 | // - Shuffle pairs where both src and dst refer to memory. |
| 303 | // - Rotate when no Swap instruction is available. |
| 304 | // |
| 305 | // Lucky for us, we are guaranteed to have extra scratch registers anytime we have a Shift that |
| 306 | // ends with a register. We search for such a register right now. |
| 307 | |
| 308 | auto moveForWidth = [&] (Width width) -> Opcode { |
| 309 | return moveFor(bank, width); |
| 310 | }; |
| 311 | |
| 312 | Opcode conservativeMove = moveForWidth(conservativeWidth(bank)); |
| 313 | |
| 314 | // We will emit things in reverse. We maintain a list of packs of instructions, and then we emit |
| 315 | // append them together in reverse (for example the thing at the end of resultPacks is placed |
| 316 | // first). This is useful because the last thing we emit frees up its destination registers, so |
| 317 | // it affects how we emit things before it. |
| 318 | Vector<Vector<Inst>> resultPacks; |
| 319 | Vector<Inst> result; |
| 320 | |
| 321 | auto commitResult = [&] () { |
| 322 | resultPacks.append(WTFMove(result)); |
| 323 | }; |
| 324 | |
| 325 | auto getScratch = [&] (unsigned index, Tmp possibleScratch) -> Tmp { |
| 326 | if (scratches[index].isTmp()) |
| 327 | return scratches[index].tmp(); |
| 328 | |
| 329 | if (!possibleScratch) |
| 330 | return Tmp(); |
| 331 | result.append(Inst(conservativeMove, origin, possibleScratch, scratches[index])); |
| 332 | return possibleScratch; |
| 333 | }; |
| 334 | |
| 335 | auto returnScratch = [&] (unsigned index, Tmp tmp) { |
| 336 | if (Arg(tmp) != scratches[index]) |
| 337 | result.append(Inst(conservativeMove, origin, scratches[index], tmp)); |
| 338 | }; |
| 339 | |
| 340 | auto handleShiftPair = [&] (const ShufflePair& pair, unsigned scratchIndex) { |
| 341 | Opcode move = moveForWidth(pair.width()); |
| 342 | |
| 343 | if (!isValidForm(move, pair.src().kind(), pair.dst().kind())) { |
| 344 | Tmp scratch = |
| 345 | getScratch(scratchIndex, findPossibleScratch(code, bank, pair.src(), pair.dst())); |
| 346 | RELEASE_ASSERT(scratch); |
| 347 | if (isValidForm(move, pair.src().kind(), Arg::Tmp)) |
| 348 | result.append(Inst(moveForWidth(pair.width()), origin, pair.src(), scratch)); |
| 349 | else { |
| 350 | ASSERT(pair.src().isSomeImm()); |
| 351 | ASSERT(move == Move32); |
| 352 | result.append(Inst(Move, origin, Arg::bigImm(pair.src().value()), scratch)); |
| 353 | } |
| 354 | result.append(Inst(moveForWidth(pair.width()), origin, scratch, pair.dst())); |
| 355 | returnScratch(scratchIndex, scratch); |
| 356 | return; |
| 357 | } |
| 358 | |
| 359 | result.append(Inst(move, origin, pair.src(), pair.dst())); |
| 360 | }; |
| 361 | |
| 362 | auto handleShift = [&] (Vector<ShufflePair>& shift) { |
| 363 | // FIXME: We could optimize the spill behavior of the shifter by checking if any of the |
| 364 | // shifts need spills. If they do, then we could try to get a register out here. Note that |
| 365 | // this may fail where the current strategy succeeds: out here we need a register that does |
| 366 | // not interfere with any of the shifts, while the current strategy only needs to find a |
| 367 | // scratch register that does not interfer with a particular shift. So, this optimization |
| 368 | // will be opportunistic: if it succeeds, then the individual shifts can use that scratch, |
| 369 | // otherwise they will do what they do now. |
| 370 | |
| 371 | for (unsigned i = shift.size(); i--;) |
| 372 | handleShiftPair(shift[i], 0); |
| 373 | |
| 374 | Arg lastDst = shift.last().dst(); |
| 375 | if (lastDst.isTmp()) { |
| 376 | for (Arg& scratch : scratches) { |
| 377 | ASSERT(scratch != lastDst); |
| 378 | if (!scratch.isTmp()) { |
| 379 | scratch = lastDst; |
| 380 | break; |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | }; |
| 385 | |
| 386 | // First handle shifts whose last destination is a tmp because these free up scratch registers. |
| 387 | // These end up last in the final sequence, so the final destination of these shifts will be |
| 388 | // available as a scratch location for anything emitted prior (so, after, since we're emitting in |
| 389 | // reverse). |
| 390 | for (auto& entry : shifts) { |
| 391 | Vector<ShufflePair>& shift = entry.value; |
| 392 | if (shift.last().dst().isTmp()) |
| 393 | handleShift(shift); |
| 394 | commitResult(); |
| 395 | } |
| 396 | |
| 397 | // Now handle the rest of the shifts. |
| 398 | for (auto& entry : shifts) { |
| 399 | Vector<ShufflePair>& shift = entry.value; |
| 400 | if (!shift.last().dst().isTmp()) |
| 401 | handleShift(shift); |
| 402 | commitResult(); |
| 403 | } |
| 404 | |
| 405 | for (Rotate& rotate : rotates) { |
| 406 | if (!rotate.fringe.isEmpty()) { |
| 407 | // Make sure we do the fringe first! This won't clobber any of the registers that are |
| 408 | // part of the rotation. |
| 409 | handleShift(rotate.fringe); |
| 410 | } |
| 411 | |
| 412 | bool canSwap = false; |
| 413 | Opcode swap = Oops; |
| 414 | Width swapWidth = Width8; // bogus value |
| 415 | |
| 416 | // Currently, the swap instruction is not available for floating point on any architecture we |
| 417 | // support. |
| 418 | if (bank == GP) { |
| 419 | // Figure out whether we will be doing 64-bit swaps or 32-bit swaps. If we have a mix of |
| 420 | // widths we handle that by fixing up the relevant register with zero-extends. |
| 421 | swap = Swap32; |
| 422 | swapWidth = Width32; |
| 423 | bool hasMemory = false; |
| 424 | bool hasIndex = false; |
| 425 | for (ShufflePair& pair : rotate.loop) { |
| 426 | switch (pair.width()) { |
| 427 | case Width32: |
| 428 | break; |
| 429 | case Width64: |
| 430 | swap = Swap64; |
| 431 | swapWidth = Width64; |
| 432 | break; |
| 433 | default: |
| 434 | RELEASE_ASSERT_NOT_REACHED(); |
| 435 | break; |
| 436 | } |
| 437 | |
| 438 | hasMemory |= pair.src().isMemory() || pair.dst().isMemory(); |
| 439 | hasIndex |= pair.src().isIndex() || pair.dst().isIndex(); |
| 440 | } |
| 441 | |
| 442 | canSwap = isValidForm(swap, Arg::Tmp, Arg::Tmp); |
| 443 | |
| 444 | // We can totally use swaps even if there are shuffles involving memory. But, we play it |
| 445 | // safe in that case. There are corner cases we don't handle, and our ability to do it is |
| 446 | // contingent upon swap form availability. |
| 447 | |
| 448 | if (hasMemory) { |
| 449 | canSwap &= isValidForm(swap, Arg::Tmp, Arg::Addr); |
| 450 | |
| 451 | // We don't take the swapping path if there is a mix of widths and some of the |
| 452 | // shuffles involve memory. That gets too confusing. We might be able to relax this |
| 453 | // to only bail if there are subwidth pairs involving memory, but I haven't thought |
| 454 | // about it very hard. Anyway, this case is not common: rotates involving memory |
| 455 | // don't arise for function calls, and they will only happen for rotates in user code |
| 456 | // if some of the variables get spilled. It's hard to imagine a program that rotates |
| 457 | // data around in variables while also doing a combination of uint32->uint64 and |
| 458 | // int64->int32 casts. |
| 459 | for (ShufflePair& pair : rotate.loop) |
| 460 | canSwap &= pair.width() == swapWidth; |
| 461 | } |
| 462 | |
| 463 | if (hasIndex) |
| 464 | canSwap &= isValidForm(swap, Arg::Tmp, Arg::Index); |
| 465 | } |
| 466 | |
| 467 | if (canSwap) { |
| 468 | for (unsigned i = rotate.loop.size() - 1; i--;) { |
| 469 | Arg left = rotate.loop[i].src(); |
| 470 | Arg right = rotate.loop[i + 1].src(); |
| 471 | |
| 472 | if (left.isMemory() && right.isMemory()) { |
| 473 | // Note that this is a super rare outcome. Rotates are rare. Spills are rare. |
| 474 | // Moving data between two spills is rare. To get here a lot of rare stuff has to |
| 475 | // all happen at once. |
| 476 | |
| 477 | Tmp scratch = getScratch(0, findPossibleScratch(code, bank, left, right)); |
| 478 | RELEASE_ASSERT(scratch); |
| 479 | result.append(Inst(moveForWidth(swapWidth), origin, left, scratch)); |
| 480 | result.append(Inst(swap, origin, scratch, right)); |
| 481 | result.append(Inst(moveForWidth(swapWidth), origin, scratch, left)); |
| 482 | returnScratch(0, scratch); |
| 483 | continue; |
| 484 | } |
| 485 | |
| 486 | if (left.isMemory()) |
| 487 | std::swap(left, right); |
| 488 | |
| 489 | result.append(Inst(swap, origin, left, right)); |
| 490 | } |
| 491 | |
| 492 | for (ShufflePair pair : rotate.loop) { |
| 493 | if (pair.width() == swapWidth) |
| 494 | continue; |
| 495 | |
| 496 | RELEASE_ASSERT(pair.width() == Width32); |
| 497 | RELEASE_ASSERT(swapWidth == Width64); |
| 498 | RELEASE_ASSERT(pair.dst().isTmp()); |
| 499 | |
| 500 | // Need to do an extra zero extension. |
| 501 | result.append(Inst(Move32, origin, pair.dst(), pair.dst())); |
| 502 | } |
| 503 | } else { |
| 504 | // We can treat this as a shift so long as we take the last destination (i.e. first |
| 505 | // source) and save it first. Then we handle the first entry in the pair in the rotate |
| 506 | // specially, after we restore the last destination. This requires some special care to |
| 507 | // find a scratch register. It's possible that we have a rotate that uses the entire |
| 508 | // available register file. |
| 509 | |
| 510 | Tmp scratch = findPossibleScratch( |
| 511 | code, bank, |
| 512 | [&] (Tmp tmp) -> bool { |
| 513 | for (ShufflePair pair : rotate.loop) { |
| 514 | if (pair.src().usesTmp(tmp)) |
| 515 | return false; |
| 516 | if (pair.dst().usesTmp(tmp)) |
| 517 | return false; |
| 518 | } |
| 519 | return true; |
| 520 | }); |
| 521 | |
| 522 | // NOTE: This is the most likely use of scratch registers. |
| 523 | scratch = getScratch(0, scratch); |
| 524 | |
| 525 | // We may not have found a scratch register. When this happens, we can just use the spill |
| 526 | // slot directly. |
| 527 | Arg rotateSave = scratch ? Arg(scratch) : scratches[0]; |
| 528 | |
| 529 | handleShiftPair( |
| 530 | ShufflePair(rotate.loop.last().dst(), rotateSave, rotate.loop[0].width()), 1); |
| 531 | |
| 532 | for (unsigned i = rotate.loop.size(); i-- > 1;) |
| 533 | handleShiftPair(rotate.loop[i], 1); |
| 534 | |
| 535 | handleShiftPair( |
| 536 | ShufflePair(rotateSave, rotate.loop[0].dst(), rotate.loop[0].width()), 1); |
| 537 | |
| 538 | if (scratch) |
| 539 | returnScratch(0, scratch); |
| 540 | } |
| 541 | |
| 542 | commitResult(); |
| 543 | } |
| 544 | |
| 545 | ASSERT(result.isEmpty()); |
| 546 | |
| 547 | for (unsigned i = resultPacks.size(); i--;) |
| 548 | result.appendVector(resultPacks[i]); |
| 549 | |
| 550 | return result; |
| 551 | } |
| 552 | |
| 553 | Vector<Inst> emitShuffle( |
| 554 | Code& code, const Vector<ShufflePair>& pairs, |
| 555 | const std::array<Arg, 2>& gpScratch, const std::array<Arg, 2>& fpScratch, |
| 556 | Value* origin) |
| 557 | { |
| 558 | Vector<ShufflePair> gpPairs; |
| 559 | Vector<ShufflePair> fpPairs; |
| 560 | for (const ShufflePair& pair : pairs) { |
| 561 | switch (pair.bank()) { |
| 562 | case GP: |
| 563 | gpPairs.append(pair); |
| 564 | break; |
| 565 | case FP: |
| 566 | fpPairs.append(pair); |
| 567 | break; |
| 568 | } |
| 569 | } |
| 570 | |
| 571 | Vector<Inst> result; |
| 572 | result.appendVector(emitShuffle(code, gpPairs, gpScratch, GP, origin)); |
| 573 | result.appendVector(emitShuffle(code, fpPairs, fpScratch, FP, origin)); |
| 574 | return result; |
| 575 | } |
| 576 | |
| 577 | } } } // namespace JSC::B3::Air |
| 578 | |
| 579 | #endif // ENABLE(B3_JIT) |
| 580 | |
| 581 | |