| 1 | /* |
| 2 | * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #pragma once |
| 27 | |
| 28 | #if ENABLE(B3_JIT) |
| 29 | |
| 30 | #include "B3Type.h" |
| 31 | #include "B3Width.h" |
| 32 | #include <wtf/Optional.h> |
| 33 | #include <wtf/StdLibExtras.h> |
| 34 | |
| 35 | namespace JSC { namespace B3 { |
| 36 | |
| 37 | // Warning: In B3, an Opcode is just one part of a Kind. Kind is used the way that an opcode |
| 38 | // would be used in simple IRs. See B3Kind.h. |
| 39 | |
| 40 | enum Opcode : uint8_t { |
| 41 | // A no-op that returns Void, useful for when you want to remove a value. |
| 42 | Nop, |
| 43 | |
| 44 | // Polymorphic identity, usable with any value type. |
| 45 | Identity, |
| 46 | |
| 47 | // This is an identity, but we prohibit the compiler from realizing this until the bitter end. This can |
| 48 | // be used to block reassociation and other compiler reasoning, if we find that it's wrong or |
| 49 | // unprofitable and we need an escape hatch. |
| 50 | Opaque, |
| 51 | |
| 52 | // Constants. Use the ConstValue* classes. Constants exist in the control flow, so that we can |
| 53 | // reason about where we would construct them. Large constants are expensive to create. |
| 54 | Const32, |
| 55 | Const64, |
| 56 | ConstDouble, |
| 57 | ConstFloat, |
| 58 | |
| 59 | // B3 supports non-SSA variables. These are accessed using Get and Set opcodes. Use the |
| 60 | // VariableValue class. It's a good idea to run fixSSA() to turn these into SSA. The |
| 61 | // optimizer will do that eventually, but if your input tends to use these opcodes, you |
| 62 | // should run fixSSA() directly before launching the optimizer. |
| 63 | Set, |
| 64 | Get, |
| 65 | |
| 66 | // Gets the base address of a StackSlot. |
| 67 | SlotBase, |
| 68 | |
| 69 | // The magical argument register. This is viewed as executing at the top of the program |
| 70 | // regardless of where in control flow you put it, and the compiler takes care to ensure that we |
| 71 | // don't clobber the value by register allocation or calls (either by saving the argument to the |
| 72 | // stack or preserving it in a callee-save register). Use the ArgumentRegValue class. The return |
| 73 | // type is either pointer() (for GPRs) or Double (for FPRs). |
| 74 | ArgumentReg, |
| 75 | |
| 76 | // The frame pointer. You can put this anywhere in control flow but it will always yield the |
| 77 | // frame pointer, with a caveat: if our compiler changes the frame pointer temporarily for some |
| 78 | // silly reason, the FramePointer intrinsic will return where the frame pointer *should* be not |
| 79 | // where it happens to be right now. |
| 80 | FramePointer, |
| 81 | |
| 82 | // Polymorphic math, usable with any value type. |
| 83 | Add, |
| 84 | Sub, |
| 85 | Mul, |
| 86 | Div, // All bets are off as to what will happen when you execute this for -2^31/-1 and x/0. |
| 87 | UDiv, |
| 88 | Mod, // All bets are off as to what will happen when you execute this for -2^31%-1 and x%0. |
| 89 | UMod, |
| 90 | |
| 91 | // Polymorphic negation. Note that we only need this for floating point, since integer negation |
| 92 | // is exactly like Sub(0, x). But that's not true for floating point. Sub(0, 0) is 0, while |
| 93 | // Neg(0) is -0. Also, we canonicalize Sub(0, x) into Neg(x) in case of integers. |
| 94 | Neg, |
| 95 | |
| 96 | // Integer math. |
| 97 | BitAnd, |
| 98 | BitOr, |
| 99 | BitXor, |
| 100 | Shl, |
| 101 | SShr, // Arithmetic Shift. |
| 102 | ZShr, // Logical Shift. |
| 103 | RotR, // Rotate Right. |
| 104 | RotL, // Rotate Left. |
| 105 | Clz, // Count leading zeros. |
| 106 | |
| 107 | // Floating point math. |
| 108 | Abs, |
| 109 | Ceil, |
| 110 | Floor, |
| 111 | Sqrt, |
| 112 | |
| 113 | // Casts and such. |
| 114 | // Bitwise Cast of Double->Int64 or Int64->Double |
| 115 | BitwiseCast, |
| 116 | // Takes and returns Int32: |
| 117 | SExt8, |
| 118 | SExt16, |
| 119 | // Takes Int32 and returns Int64: |
| 120 | SExt32, |
| 121 | ZExt32, |
| 122 | // Does a bitwise truncation of Int64->Int32 and Double->Float: |
| 123 | Trunc, |
| 124 | // Takes ints and returns floating point value. Note that we don't currently provide the opposite operation, |
| 125 | // because double-to-int conversions have weirdly different semantics on different platforms. Use |
| 126 | // a patchpoint if you need to do that. |
| 127 | IToD, |
| 128 | IToF, |
| 129 | // Convert between double and float. |
| 130 | FloatToDouble, |
| 131 | DoubleToFloat, |
| 132 | |
| 133 | // Polymorphic comparisons, usable with any value type. Returns int32 0 or 1. Note that "Not" |
| 134 | // is just Equal(x, 0), and "ToBoolean" is just NotEqual(x, 0). |
| 135 | Equal, |
| 136 | NotEqual, |
| 137 | LessThan, |
| 138 | GreaterThan, |
| 139 | LessEqual, |
| 140 | GreaterEqual, |
| 141 | |
| 142 | // Integer comparisons. Returns int32 0 or 1. |
| 143 | Above, |
| 144 | Below, |
| 145 | AboveEqual, |
| 146 | BelowEqual, |
| 147 | |
| 148 | // Unordered floating point compare: values are equal or either one is NaN. |
| 149 | EqualOrUnordered, |
| 150 | |
| 151 | // SSA form of conditional move. The first child is evaluated for truthiness. If true, the second child |
| 152 | // is returned. Otherwise, the third child is returned. |
| 153 | Select, |
| 154 | |
| 155 | // Memory loads. Opcode indicates how we load and the loaded type. These use MemoryValue. |
| 156 | // These return Int32: |
| 157 | Load8Z, |
| 158 | Load8S, |
| 159 | Load16Z, |
| 160 | Load16S, |
| 161 | // This returns whatever the return type is: |
| 162 | Load, |
| 163 | |
| 164 | // Memory stores. Opcode indicates how the value is stored. These use MemoryValue. |
| 165 | // These take an Int32 value: |
| 166 | Store8, |
| 167 | Store16, |
| 168 | // This is a polymorphic store for Int32, Int64, Float, and Double. |
| 169 | Store, |
| 170 | |
| 171 | // Atomic compare and swap that returns a boolean. May choose to do nothing and return false. You can |
| 172 | // usually assume that this is faster and results in less code than AtomicStrongCAS, though that's |
| 173 | // not necessarily true on Intel, if instruction selection does its job. Imagine that this opcode is |
| 174 | // as if you did this atomically: |
| 175 | // |
| 176 | // template<typename T> |
| 177 | // bool AtomicWeakCAS(T expectedValue, T newValue, T* ptr) |
| 178 | // { |
| 179 | // if (!rand()) |
| 180 | // return false; // Real world example of this: context switch on ARM while doing CAS. |
| 181 | // if (*ptr != expectedValue) |
| 182 | // return false; |
| 183 | // *ptr = newValue; |
| 184 | // return true; |
| 185 | // } |
| 186 | // |
| 187 | // Note that all atomics put the pointer last to be consistent with how loads and stores work. This |
| 188 | // is a goofy tradition, but it's harmless, and better than being inconsistent. |
| 189 | // |
| 190 | // Note that weak CAS has no fencing guarantees when it fails. This means that the following |
| 191 | // transformation is always valid: |
| 192 | // |
| 193 | // Before: |
| 194 | // |
| 195 | // Branch(AtomicWeakCAS(expected, new, ptr)) |
| 196 | // Successors: Then:#success, Else:#fail |
| 197 | // |
| 198 | // After: |
| 199 | // |
| 200 | // Branch(Equal(Load(ptr), expected)) |
| 201 | // Successors: Then:#attempt, Else:#fail |
| 202 | // BB#attempt: |
| 203 | // Branch(AtomicWeakCAS(expected, new, ptr)) |
| 204 | // Successors: Then:#success, Else:#fail |
| 205 | // |
| 206 | // Both kinds of CAS for non-canonical widths (Width8 and Width16) ignore the irrelevant bits of the |
| 207 | // input. |
| 208 | AtomicWeakCAS, |
| 209 | |
| 210 | // Atomic compare and swap that returns the old value. Does not have the nondeterminism of WeakCAS. |
| 211 | // This is a bit more code and a bit slower in some cases, though not by a lot. Imagine that this |
| 212 | // opcode is as if you did this atomically: |
| 213 | // |
| 214 | // template<typename T> |
| 215 | // T AtomicStrongCAS(T expectedValue, T newValue, T* ptr) |
| 216 | // { |
| 217 | // T oldValue = *ptr; |
| 218 | // if (oldValue == expectedValue) |
| 219 | // *ptr = newValue; |
| 220 | // return oldValue |
| 221 | // } |
| 222 | // |
| 223 | // AtomicStrongCAS sign-extends its result for subwidth operations. |
| 224 | // |
| 225 | // Note that AtomicWeakCAS and AtomicStrongCAS sort of have this kind of equivalence: |
| 226 | // |
| 227 | // AtomicWeakCAS(@exp, @new, @ptr) == Equal(AtomicStrongCAS(@exp, @new, @ptr), @exp) |
| 228 | // |
| 229 | // Assuming that the WeakCAS does not spuriously fail, of course. |
| 230 | AtomicStrongCAS, |
| 231 | |
| 232 | // Atomically ___ a memory location and return the old value. Syntax: |
| 233 | // |
| 234 | // @oldValue = AtomicXchg___(@operand, @ptr) |
| 235 | // |
| 236 | // For non-canonical widths (Width8 and Width16), these return sign-extended results and ignore the |
| 237 | // irrelevant bits of their inputs. |
| 238 | AtomicXchgAdd, |
| 239 | AtomicXchgAnd, |
| 240 | AtomicXchgOr, |
| 241 | AtomicXchgSub, |
| 242 | AtomicXchgXor, |
| 243 | |
| 244 | // FIXME: Maybe we should have AtomicXchgNeg. |
| 245 | // https://bugs.webkit.org/show_bug.cgi?id=169252 |
| 246 | |
| 247 | // Atomically exchange a value with a memory location. Syntax: |
| 248 | // |
| 249 | // @oldValue = AtomicXchg(@newValue, @ptr) |
| 250 | AtomicXchg, |
| 251 | |
| 252 | // Introduce an invisible dependency for blocking motion of loads with respect to each other. Syntax: |
| 253 | // |
| 254 | // @result = Depend(@phantom) |
| 255 | // |
| 256 | // This is eventually codegenerated to have local semantics as if we did: |
| 257 | // |
| 258 | // @result = $0 |
| 259 | // |
| 260 | // But it ensures that the users of @result cannot execute until @phantom is computed. |
| 261 | // |
| 262 | // The compiler is not allowed to reason about the fact that Depend codegenerates this way. Any kind |
| 263 | // of transformation or analysis that relies on the insight that Depend is really zero is unsound, |
| 264 | // because it unlocks reordering of users of @result and @phantom. |
| 265 | // |
| 266 | // On X86, this is lowered to a load-load fence and @result folds to zero. |
| 267 | // |
| 268 | // On ARM, this is lowered as if like: |
| 269 | // |
| 270 | // @result = BitXor(@phantom, @phantom) |
| 271 | // |
| 272 | // Except that the compiler never gets an opportunity to simplify out the BitXor. |
| 273 | Depend, |
| 274 | |
| 275 | // This is used to compute the actual address of a Wasm memory operation. It takes an IntPtr |
| 276 | // and a pinned register then computes the appropriate IntPtr address. For the use-case of |
| 277 | // Wasm it is important that the first child initially be a ZExt32 so the top bits are cleared. |
| 278 | // We do WasmAddress(ZExt32(ptr), ...) so that we can avoid generating extraneous moves in Air. |
| 279 | WasmAddress, |
| 280 | |
| 281 | // This is used to represent standalone fences - i.e. fences that are not part of other |
| 282 | // instructions. It's expressive enough to expose mfence on x86 and dmb ish/ishst on ARM. On |
| 283 | // x86, it also acts as a compiler store-store fence in those cases where it would have been a |
| 284 | // dmb ishst on ARM. |
| 285 | Fence, |
| 286 | |
| 287 | // This is a regular ordinary C function call, using the system C calling convention. Make sure |
| 288 | // that the arguments are passed using the right types. The first argument is the callee. |
| 289 | CCall, |
| 290 | |
| 291 | // This is a patchpoint. Use the PatchpointValue class. This is viewed as behaving like a call, |
| 292 | // but only emits code via a code generation callback. That callback gets to emit code inline. |
| 293 | // You can pass a stackmap along with constraints on how each stackmap argument must be passed. |
| 294 | // It's legal to request that a stackmap argument is in some register and it's legal to request |
| 295 | // that a stackmap argument is at some offset from the top of the argument passing area on the |
| 296 | // stack. |
| 297 | Patchpoint, |
| 298 | |
| 299 | // Checked math. Use the CheckValue class. Like a Patchpoint, this takes a code generation |
| 300 | // callback. That callback gets to emit some code after the epilogue, and gets to link the jump |
| 301 | // from the check, and the choice of registers. You also get to supply a stackmap. Note that you |
| 302 | // are not allowed to jump back into the mainline code from your slow path, since the compiler |
| 303 | // will assume that the execution of these instructions proves that overflow didn't happen. For |
| 304 | // example, if you have two CheckAdd's: |
| 305 | // |
| 306 | // a = CheckAdd(x, y) |
| 307 | // b = CheckAdd(x, y) |
| 308 | // |
| 309 | // Then it's valid to change this to: |
| 310 | // |
| 311 | // a = CheckAdd(x, y) |
| 312 | // b = Identity(a) |
| 313 | // |
| 314 | // This is valid regardless of the callbacks used by the two CheckAdds. They may have different |
| 315 | // callbacks. Yet, this transformation is valid even if they are different because we know that |
| 316 | // after the first CheckAdd executes, the second CheckAdd could not have possibly taken slow |
| 317 | // path. Therefore, the second CheckAdd's callback is irrelevant. |
| 318 | // |
| 319 | // Note that the first two children of these operations have ValueRep's as input constraints but do |
| 320 | // not have output constraints. |
| 321 | CheckAdd, |
| 322 | CheckSub, |
| 323 | CheckMul, |
| 324 | |
| 325 | // Check that side-exits. Use the CheckValue class. Like CheckAdd and friends, this has a |
| 326 | // stackmap with a generation callback. This takes an int argument that this branches on, with |
| 327 | // full branch fusion in the instruction selector. A true value jumps to the generator's slow |
| 328 | // path. Note that the predicate child is has both an input ValueRep. The input constraint must be |
| 329 | // WarmAny. It will not have an output constraint. |
| 330 | Check, |
| 331 | |
| 332 | // Special Wasm opcode that takes a Int32, a special pinned gpr and an offset. This node exists |
| 333 | // to allow us to CSE WasmBoundsChecks if both use the same pointer and one dominates the other. |
| 334 | // Without some such node B3 would not have enough information about the inner workings of wasm |
| 335 | // to be able to perform such optimizations. |
| 336 | WasmBoundsCheck, |
| 337 | |
| 338 | // SSA support, in the style of DFG SSA. |
| 339 | Upsilon, // This uses the UpsilonValue class. |
| 340 | Phi, |
| 341 | |
| 342 | // Jump. |
| 343 | Jump, |
| 344 | |
| 345 | // Polymorphic branch, usable with any integer type. Branches if not equal to zero. The 0-index |
| 346 | // successor is the true successor. |
| 347 | Branch, |
| 348 | |
| 349 | // Switch. Switches over either Int32 or Int64. Uses the SwitchValue class. |
| 350 | Switch, |
| 351 | |
| 352 | // Multiple entrypoints are supported via the EntrySwitch operation. Place this in the root |
| 353 | // block and list the entrypoints as the successors. All blocks backwards-reachable from |
| 354 | // EntrySwitch are duplicated for each entrypoint. |
| 355 | EntrySwitch, |
| 356 | |
| 357 | // Return. Note that B3 procedures don't know their return type, so this can just return any |
| 358 | // type. |
| 359 | Return, |
| 360 | |
| 361 | // This is a terminal that indicates that we will never get here. |
| 362 | Oops |
| 363 | }; |
| 364 | |
| 365 | inline bool isCheckMath(Opcode opcode) |
| 366 | { |
| 367 | switch (opcode) { |
| 368 | case CheckAdd: |
| 369 | case CheckSub: |
| 370 | case CheckMul: |
| 371 | return true; |
| 372 | default: |
| 373 | return false; |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | Optional<Opcode> invertedCompare(Opcode, Type); |
| 378 | |
| 379 | inline Opcode constPtrOpcode() |
| 380 | { |
| 381 | if (is64Bit()) |
| 382 | return Const64; |
| 383 | return Const32; |
| 384 | } |
| 385 | |
| 386 | inline bool isConstant(Opcode opcode) |
| 387 | { |
| 388 | switch (opcode) { |
| 389 | case Const32: |
| 390 | case Const64: |
| 391 | case ConstDouble: |
| 392 | case ConstFloat: |
| 393 | return true; |
| 394 | default: |
| 395 | return false; |
| 396 | } |
| 397 | } |
| 398 | |
| 399 | inline Opcode opcodeForConstant(Type type) |
| 400 | { |
| 401 | switch (type) { |
| 402 | case Int32: return Const32; |
| 403 | case Int64: return Const64; |
| 404 | case Float: return ConstFloat; |
| 405 | case Double: return ConstDouble; |
| 406 | default: |
| 407 | RELEASE_ASSERT_NOT_REACHED(); |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | inline bool isDefinitelyTerminal(Opcode opcode) |
| 412 | { |
| 413 | switch (opcode) { |
| 414 | case Jump: |
| 415 | case Branch: |
| 416 | case Switch: |
| 417 | case Oops: |
| 418 | case Return: |
| 419 | return true; |
| 420 | default: |
| 421 | return false; |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | inline bool isLoad(Opcode opcode) |
| 426 | { |
| 427 | switch (opcode) { |
| 428 | case Load8Z: |
| 429 | case Load8S: |
| 430 | case Load16Z: |
| 431 | case Load16S: |
| 432 | case Load: |
| 433 | return true; |
| 434 | default: |
| 435 | return false; |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | inline bool isStore(Opcode opcode) |
| 440 | { |
| 441 | switch (opcode) { |
| 442 | case Store8: |
| 443 | case Store16: |
| 444 | case Store: |
| 445 | return true; |
| 446 | default: |
| 447 | return false; |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | inline bool isLoadStore(Opcode opcode) |
| 452 | { |
| 453 | switch (opcode) { |
| 454 | case Load8Z: |
| 455 | case Load8S: |
| 456 | case Load16Z: |
| 457 | case Load16S: |
| 458 | case Load: |
| 459 | case Store8: |
| 460 | case Store16: |
| 461 | case Store: |
| 462 | return true; |
| 463 | default: |
| 464 | return false; |
| 465 | } |
| 466 | } |
| 467 | |
| 468 | inline bool isAtomic(Opcode opcode) |
| 469 | { |
| 470 | switch (opcode) { |
| 471 | case AtomicWeakCAS: |
| 472 | case AtomicStrongCAS: |
| 473 | case AtomicXchgAdd: |
| 474 | case AtomicXchgAnd: |
| 475 | case AtomicXchgOr: |
| 476 | case AtomicXchgSub: |
| 477 | case AtomicXchgXor: |
| 478 | case AtomicXchg: |
| 479 | return true; |
| 480 | default: |
| 481 | return false; |
| 482 | } |
| 483 | } |
| 484 | |
| 485 | inline bool isAtomicCAS(Opcode opcode) |
| 486 | { |
| 487 | switch (opcode) { |
| 488 | case AtomicWeakCAS: |
| 489 | case AtomicStrongCAS: |
| 490 | return true; |
| 491 | default: |
| 492 | return false; |
| 493 | } |
| 494 | } |
| 495 | |
| 496 | inline bool isAtomicXchg(Opcode opcode) |
| 497 | { |
| 498 | switch (opcode) { |
| 499 | case AtomicXchgAdd: |
| 500 | case AtomicXchgAnd: |
| 501 | case AtomicXchgOr: |
| 502 | case AtomicXchgSub: |
| 503 | case AtomicXchgXor: |
| 504 | case AtomicXchg: |
| 505 | return true; |
| 506 | default: |
| 507 | return false; |
| 508 | } |
| 509 | } |
| 510 | |
| 511 | inline bool isMemoryAccess(Opcode opcode) |
| 512 | { |
| 513 | return isAtomic(opcode) || isLoadStore(opcode); |
| 514 | } |
| 515 | |
| 516 | inline Opcode signExtendOpcode(Width width) |
| 517 | { |
| 518 | switch (width) { |
| 519 | case Width8: |
| 520 | return SExt8; |
| 521 | case Width16: |
| 522 | return SExt16; |
| 523 | default: |
| 524 | RELEASE_ASSERT_NOT_REACHED(); |
| 525 | return Oops; |
| 526 | } |
| 527 | } |
| 528 | |
| 529 | JS_EXPORT_PRIVATE Opcode storeOpcode(Bank bank, Width width); |
| 530 | |
| 531 | } } // namespace JSC::B3 |
| 532 | |
| 533 | namespace WTF { |
| 534 | |
| 535 | class PrintStream; |
| 536 | |
| 537 | JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Opcode); |
| 538 | |
| 539 | } // namespace WTF |
| 540 | |
| 541 | #endif // ENABLE(B3_JIT) |
| 542 | |