| 1 | /* |
| 2 | * Copyright (C) 2013-2018 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | |
| 28 | #if ENABLE(ASSEMBLER) && (CPU(X86) || CPU(X86_64)) |
| 29 | #include "MacroAssembler.h" |
| 30 | |
| 31 | #include "ProbeContext.h" |
| 32 | #include <wtf/InlineASM.h> |
| 33 | |
| 34 | #if COMPILER(MSVC) |
| 35 | #include <intrin.h> |
| 36 | #endif |
| 37 | |
| 38 | namespace JSC { |
| 39 | |
| 40 | #if ENABLE(MASM_PROBE) |
| 41 | |
| 42 | extern "C" void ctiMasmProbeTrampoline(); |
| 43 | |
| 44 | // The following are offsets for Probe::State fields accessed by the ctiMasmProbeTrampoline stub. |
| 45 | |
| 46 | #if CPU(X86) |
| 47 | #define PTR_SIZE 4 |
| 48 | #else // CPU(X86_64) |
| 49 | #define PTR_SIZE 8 |
| 50 | #endif |
| 51 | |
| 52 | #define PROBE_PROBE_FUNCTION_OFFSET (0 * PTR_SIZE) |
| 53 | #define PROBE_ARG_OFFSET (1 * PTR_SIZE) |
| 54 | #define PROBE_INIT_STACK_FUNCTION_OFFSET (2 * PTR_SIZE) |
| 55 | #define PROBE_INIT_STACK_ARG_OFFSET (3 * PTR_SIZE) |
| 56 | |
| 57 | #define PROBE_FIRST_GPR_OFFSET (4 * PTR_SIZE) |
| 58 | #define PROBE_CPU_EAX_OFFSET (PROBE_FIRST_GPR_OFFSET + (0 * PTR_SIZE)) |
| 59 | #define PROBE_CPU_ECX_OFFSET (PROBE_FIRST_GPR_OFFSET + (1 * PTR_SIZE)) |
| 60 | #define PROBE_CPU_EDX_OFFSET (PROBE_FIRST_GPR_OFFSET + (2 * PTR_SIZE)) |
| 61 | #define PROBE_CPU_EBX_OFFSET (PROBE_FIRST_GPR_OFFSET + (3 * PTR_SIZE)) |
| 62 | #define PROBE_CPU_ESP_OFFSET (PROBE_FIRST_GPR_OFFSET + (4 * PTR_SIZE)) |
| 63 | #define PROBE_CPU_EBP_OFFSET (PROBE_FIRST_GPR_OFFSET + (5 * PTR_SIZE)) |
| 64 | #define PROBE_CPU_ESI_OFFSET (PROBE_FIRST_GPR_OFFSET + (6 * PTR_SIZE)) |
| 65 | #define PROBE_CPU_EDI_OFFSET (PROBE_FIRST_GPR_OFFSET + (7 * PTR_SIZE)) |
| 66 | |
| 67 | #if CPU(X86) |
| 68 | #define PROBE_FIRST_SPR_OFFSET (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE)) |
| 69 | #else // CPU(X86_64) |
| 70 | #define PROBE_CPU_R8_OFFSET (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE)) |
| 71 | #define PROBE_CPU_R9_OFFSET (PROBE_FIRST_GPR_OFFSET + (9 * PTR_SIZE)) |
| 72 | #define PROBE_CPU_R10_OFFSET (PROBE_FIRST_GPR_OFFSET + (10 * PTR_SIZE)) |
| 73 | #define PROBE_CPU_R11_OFFSET (PROBE_FIRST_GPR_OFFSET + (11 * PTR_SIZE)) |
| 74 | #define PROBE_CPU_R12_OFFSET (PROBE_FIRST_GPR_OFFSET + (12 * PTR_SIZE)) |
| 75 | #define PROBE_CPU_R13_OFFSET (PROBE_FIRST_GPR_OFFSET + (13 * PTR_SIZE)) |
| 76 | #define PROBE_CPU_R14_OFFSET (PROBE_FIRST_GPR_OFFSET + (14 * PTR_SIZE)) |
| 77 | #define PROBE_CPU_R15_OFFSET (PROBE_FIRST_GPR_OFFSET + (15 * PTR_SIZE)) |
| 78 | #define PROBE_FIRST_SPR_OFFSET (PROBE_FIRST_GPR_OFFSET + (16 * PTR_SIZE)) |
| 79 | #endif // CPU(X86_64) |
| 80 | |
| 81 | #define PROBE_CPU_EIP_OFFSET (PROBE_FIRST_SPR_OFFSET + (0 * PTR_SIZE)) |
| 82 | #define PROBE_CPU_EFLAGS_OFFSET (PROBE_FIRST_SPR_OFFSET + (1 * PTR_SIZE)) |
| 83 | #define PROBE_FIRST_XMM_OFFSET (PROBE_FIRST_SPR_OFFSET + (2 * PTR_SIZE)) |
| 84 | |
| 85 | #define XMM_SIZE 8 |
| 86 | #define PROBE_CPU_XMM0_OFFSET (PROBE_FIRST_XMM_OFFSET + (0 * XMM_SIZE)) |
| 87 | #define PROBE_CPU_XMM1_OFFSET (PROBE_FIRST_XMM_OFFSET + (1 * XMM_SIZE)) |
| 88 | #define PROBE_CPU_XMM2_OFFSET (PROBE_FIRST_XMM_OFFSET + (2 * XMM_SIZE)) |
| 89 | #define PROBE_CPU_XMM3_OFFSET (PROBE_FIRST_XMM_OFFSET + (3 * XMM_SIZE)) |
| 90 | #define PROBE_CPU_XMM4_OFFSET (PROBE_FIRST_XMM_OFFSET + (4 * XMM_SIZE)) |
| 91 | #define PROBE_CPU_XMM5_OFFSET (PROBE_FIRST_XMM_OFFSET + (5 * XMM_SIZE)) |
| 92 | #define PROBE_CPU_XMM6_OFFSET (PROBE_FIRST_XMM_OFFSET + (6 * XMM_SIZE)) |
| 93 | #define PROBE_CPU_XMM7_OFFSET (PROBE_FIRST_XMM_OFFSET + (7 * XMM_SIZE)) |
| 94 | |
| 95 | #if CPU(X86) |
| 96 | #define PROBE_SIZE (PROBE_CPU_XMM7_OFFSET + XMM_SIZE) |
| 97 | #else // CPU(X86_64) |
| 98 | #define PROBE_CPU_XMM8_OFFSET (PROBE_FIRST_XMM_OFFSET + (8 * XMM_SIZE)) |
| 99 | #define PROBE_CPU_XMM9_OFFSET (PROBE_FIRST_XMM_OFFSET + (9 * XMM_SIZE)) |
| 100 | #define PROBE_CPU_XMM10_OFFSET (PROBE_FIRST_XMM_OFFSET + (10 * XMM_SIZE)) |
| 101 | #define PROBE_CPU_XMM11_OFFSET (PROBE_FIRST_XMM_OFFSET + (11 * XMM_SIZE)) |
| 102 | #define PROBE_CPU_XMM12_OFFSET (PROBE_FIRST_XMM_OFFSET + (12 * XMM_SIZE)) |
| 103 | #define PROBE_CPU_XMM13_OFFSET (PROBE_FIRST_XMM_OFFSET + (13 * XMM_SIZE)) |
| 104 | #define PROBE_CPU_XMM14_OFFSET (PROBE_FIRST_XMM_OFFSET + (14 * XMM_SIZE)) |
| 105 | #define PROBE_CPU_XMM15_OFFSET (PROBE_FIRST_XMM_OFFSET + (15 * XMM_SIZE)) |
| 106 | #define PROBE_SIZE (PROBE_CPU_XMM15_OFFSET + XMM_SIZE) |
| 107 | #endif // CPU(X86_64) |
| 108 | |
| 109 | #define PROBE_EXECUTOR_OFFSET PROBE_SIZE // Stash the executeProbe function pointer at the end of the ProbeContext. |
| 110 | |
| 111 | // The outgoing record to be popped off the stack at the end consists of: |
| 112 | // eflags, eax, ecx, ebp, eip. |
| 113 | #define OUT_SIZE (5 * PTR_SIZE) |
| 114 | |
| 115 | // These ASSERTs remind you that if you change the layout of Probe::State, |
| 116 | // you need to change ctiMasmProbeTrampoline offsets above to match. |
| 117 | #define PROBE_OFFSETOF(x) offsetof(struct Probe::State, x) |
| 118 | #define PROBE_OFFSETOF_REG(x, reg) offsetof(struct Probe::State, x) + reg * sizeof((reinterpret_cast<Probe::State*>(0))->x[reg]) |
| 119 | static_assert(PROBE_OFFSETOF(probeFunction) == PROBE_PROBE_FUNCTION_OFFSET, "Probe::State::probeFunction's offset matches ctiMasmProbeTrampoline" ); |
| 120 | static_assert(PROBE_OFFSETOF(arg) == PROBE_ARG_OFFSET, "Probe::State::arg's offset matches ctiMasmProbeTrampoline" ); |
| 121 | static_assert(PROBE_OFFSETOF(initializeStackFunction) == PROBE_INIT_STACK_FUNCTION_OFFSET, "Probe::State::initializeStackFunction's offset matches ctiMasmProbeTrampoline" ); |
| 122 | static_assert(PROBE_OFFSETOF(initializeStackArg) == PROBE_INIT_STACK_ARG_OFFSET, "Probe::State::initializeStackArg's offset matches ctiMasmProbeTrampoline" ); |
| 123 | |
| 124 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::eax) == PROBE_CPU_EAX_OFFSET, "Probe::State::cpu.gprs[eax]'s offset matches ctiMasmProbeTrampoline" ); |
| 125 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::ecx) == PROBE_CPU_ECX_OFFSET, "Probe::State::cpu.gprs[ecx]'s offset matches ctiMasmProbeTrampoline" ); |
| 126 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::edx) == PROBE_CPU_EDX_OFFSET, "Probe::State::cpu.gprs[edx]'s offset matches ctiMasmProbeTrampoline" ); |
| 127 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::ebx) == PROBE_CPU_EBX_OFFSET, "Probe::State::cpu.gprs[ebx]'s offset matches ctiMasmProbeTrampoline" ); |
| 128 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::esp) == PROBE_CPU_ESP_OFFSET, "Probe::State::cpu.gprs[esp]'s offset matches ctiMasmProbeTrampoline" ); |
| 129 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::ebp) == PROBE_CPU_EBP_OFFSET, "Probe::State::cpu.gprs[ebp]'s offset matches ctiMasmProbeTrampoline" ); |
| 130 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::esi) == PROBE_CPU_ESI_OFFSET, "Probe::State::cpu.gprs[esi]'s offset matches ctiMasmProbeTrampoline" ); |
| 131 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::edi) == PROBE_CPU_EDI_OFFSET, "Probe::State::cpu.gprs[edi]'s offset matches ctiMasmProbeTrampoline" ); |
| 132 | static_assert(PROBE_OFFSETOF_REG(cpu.sprs, X86Registers::eip) == PROBE_CPU_EIP_OFFSET, "Probe::State::cpu.gprs[eip]'s offset matches ctiMasmProbeTrampoline" ); |
| 133 | static_assert(PROBE_OFFSETOF_REG(cpu.sprs, X86Registers::eflags) == PROBE_CPU_EFLAGS_OFFSET, "Probe::State::cpu.sprs[eflags]'s offset matches ctiMasmProbeTrampoline" ); |
| 134 | |
| 135 | #if CPU(X86_64) |
| 136 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r8) == PROBE_CPU_R8_OFFSET, "Probe::State::cpu.gprs[r8]'s offset matches ctiMasmProbeTrampoline" ); |
| 137 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r9) == PROBE_CPU_R9_OFFSET, "Probe::State::cpu.gprs[r9]'s offset matches ctiMasmProbeTrampoline" ); |
| 138 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r10) == PROBE_CPU_R10_OFFSET, "Probe::State::cpu.gprs[r10]'s offset matches ctiMasmProbeTrampoline" ); |
| 139 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r11) == PROBE_CPU_R11_OFFSET, "Probe::State::cpu.gprs[r11]'s offset matches ctiMasmProbeTrampoline" ); |
| 140 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r12) == PROBE_CPU_R12_OFFSET, "Probe::State::cpu.gprs[r12]'s offset matches ctiMasmProbeTrampoline" ); |
| 141 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r13) == PROBE_CPU_R13_OFFSET, "Probe::State::cpu.gprs[r13]'s offset matches ctiMasmProbeTrampoline" ); |
| 142 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r14) == PROBE_CPU_R14_OFFSET, "Probe::State::cpu.gprs[r14]'s offset matches ctiMasmProbeTrampoline" ); |
| 143 | static_assert(PROBE_OFFSETOF_REG(cpu.gprs, X86Registers::r15) == PROBE_CPU_R15_OFFSET, "Probe::State::cpu.gprs[r15]'s offset matches ctiMasmProbeTrampoline" ); |
| 144 | #endif // CPU(X86_64) |
| 145 | |
| 146 | static_assert(!(PROBE_CPU_XMM0_OFFSET & 0x7), "Probe::State::cpu.fprs[xmm0]'s offset should be 8 byte aligned" ); |
| 147 | |
| 148 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm0) == PROBE_CPU_XMM0_OFFSET, "Probe::State::cpu.fprs[xmm0]'s offset matches ctiMasmProbeTrampoline" ); |
| 149 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm1) == PROBE_CPU_XMM1_OFFSET, "Probe::State::cpu.fprs[xmm1]'s offset matches ctiMasmProbeTrampoline" ); |
| 150 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm2) == PROBE_CPU_XMM2_OFFSET, "Probe::State::cpu.fprs[xmm2]'s offset matches ctiMasmProbeTrampoline" ); |
| 151 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm3) == PROBE_CPU_XMM3_OFFSET, "Probe::State::cpu.fprs[xmm3]'s offset matches ctiMasmProbeTrampoline" ); |
| 152 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm4) == PROBE_CPU_XMM4_OFFSET, "Probe::State::cpu.fprs[xmm4]'s offset matches ctiMasmProbeTrampoline" ); |
| 153 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm5) == PROBE_CPU_XMM5_OFFSET, "Probe::State::cpu.fprs[xmm5]'s offset matches ctiMasmProbeTrampoline" ); |
| 154 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm6) == PROBE_CPU_XMM6_OFFSET, "Probe::State::cpu.fprs[xmm6]'s offset matches ctiMasmProbeTrampoline" ); |
| 155 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm7) == PROBE_CPU_XMM7_OFFSET, "Probe::State::cpu.fprs[xmm7]'s offset matches ctiMasmProbeTrampoline" ); |
| 156 | |
| 157 | #if CPU(X86_64) |
| 158 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm8) == PROBE_CPU_XMM8_OFFSET, "Probe::State::cpu.fprs[xmm8]'s offset matches ctiMasmProbeTrampoline" ); |
| 159 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm9) == PROBE_CPU_XMM9_OFFSET, "Probe::State::cpu.fprs[xmm9]'s offset matches ctiMasmProbeTrampoline" ); |
| 160 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm10) == PROBE_CPU_XMM10_OFFSET, "Probe::State::cpu.fprs[xmm10]'s offset matches ctiMasmProbeTrampoline" ); |
| 161 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm11) == PROBE_CPU_XMM11_OFFSET, "Probe::State::cpu.fprs[xmm11]'s offset matches ctiMasmProbeTrampoline" ); |
| 162 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm12) == PROBE_CPU_XMM12_OFFSET, "Probe::State::cpu.fprs[xmm12]'s offset matches ctiMasmProbeTrampoline" ); |
| 163 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm13) == PROBE_CPU_XMM13_OFFSET, "Probe::State::cpu.fprs[xmm13]'s offset matches ctiMasmProbeTrampoline" ); |
| 164 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm14) == PROBE_CPU_XMM14_OFFSET, "Probe::State::cpu.fprs[xmm14]'s offset matches ctiMasmProbeTrampoline" ); |
| 165 | static_assert(PROBE_OFFSETOF_REG(cpu.fprs, X86Registers::xmm15) == PROBE_CPU_XMM15_OFFSET, "Probe::State::cpu.fprs[xmm15]'s offset matches ctiMasmProbeTrampoline" ); |
| 166 | #endif // CPU(X86_64) |
| 167 | |
| 168 | static_assert(sizeof(Probe::State) == PROBE_SIZE, "Probe::State::size's matches ctiMasmProbeTrampoline" ); |
| 169 | static_assert((PROBE_EXECUTOR_OFFSET + PTR_SIZE) <= (PROBE_SIZE + OUT_SIZE), "Must have room after ProbeContext to stash the probe handler" ); |
| 170 | |
| 171 | #undef PROBE_OFFSETOF |
| 172 | |
| 173 | #if CPU(X86) |
| 174 | #if COMPILER(GCC_COMPATIBLE) |
| 175 | asm ( |
| 176 | ".globl " SYMBOL_STRING(ctiMasmProbeTrampoline) "\n" |
| 177 | HIDE_SYMBOL(ctiMasmProbeTrampoline) "\n" |
| 178 | SYMBOL_STRING(ctiMasmProbeTrampoline) ":" "\n" |
| 179 | |
| 180 | "pushfl" "\n" |
| 181 | |
| 182 | // MacroAssemblerX86Common::probe() has already generated code to store some values. |
| 183 | // Together with the eflags pushed above, the top of stack now looks like |
| 184 | // this: |
| 185 | // esp[0 * ptrSize]: eflags |
| 186 | // esp[1 * ptrSize]: return address / saved eip |
| 187 | // esp[2 * ptrSize]: saved ebx |
| 188 | // esp[3 * ptrSize]: saved edx |
| 189 | // esp[4 * ptrSize]: saved ecx |
| 190 | // esp[5 * ptrSize]: saved eax |
| 191 | // |
| 192 | // Incoming registers contain: |
| 193 | // ecx: Probe::executeProbe |
| 194 | // edx: probe function |
| 195 | // ebx: probe arg |
| 196 | // eax: scratch (was ctiMasmProbeTrampoline) |
| 197 | |
| 198 | "movl %esp, %eax" "\n" |
| 199 | "subl $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %esp" "\n" |
| 200 | |
| 201 | // The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes. |
| 202 | "andl $~0x1f, %esp" "\n" |
| 203 | |
| 204 | "movl %ebp, " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%esp)" "\n" |
| 205 | "movl %esp, %ebp" "\n" // Save the Probe::State*. |
| 206 | |
| 207 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%ebp)" "\n" |
| 208 | "movl %edx, " STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "(%ebp)" "\n" |
| 209 | "movl %ebx, " STRINGIZE_VALUE_OF(PROBE_ARG_OFFSET) "(%ebp)" "\n" |
| 210 | "movl %esi, " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%ebp)" "\n" |
| 211 | "movl %edi, " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%ebp)" "\n" |
| 212 | |
| 213 | "movl 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
| 214 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%ebp)" "\n" |
| 215 | "movl 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
| 216 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%ebp)" "\n" |
| 217 | "movl 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
| 218 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%ebp)" "\n" |
| 219 | "movl 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
| 220 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%ebp)" "\n" |
| 221 | "movl 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
| 222 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%ebp)" "\n" |
| 223 | "movl 5 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n" |
| 224 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%ebp)" "\n" |
| 225 | |
| 226 | "movl %eax, %ecx" "\n" |
| 227 | "addl $" STRINGIZE_VALUE_OF(6 * PTR_SIZE) ", %ecx" "\n" |
| 228 | "movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%ebp)" "\n" |
| 229 | |
| 230 | "movq %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp)" "\n" |
| 231 | "movq %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp)" "\n" |
| 232 | "movq %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp)" "\n" |
| 233 | "movq %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp)" "\n" |
| 234 | "movq %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp)" "\n" |
| 235 | "movq %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp)" "\n" |
| 236 | "movq %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp)" "\n" |
| 237 | "movq %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp)" "\n" |
| 238 | |
| 239 | // Reserve stack space for the arg while maintaining the required stack |
| 240 | // pointer 32 byte alignment: |
| 241 | "subl $0x20, %esp" "\n" |
| 242 | "movl %ebp, 0(%esp)" "\n" // the Probe::State* arg. |
| 243 | |
| 244 | "call *" STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%ebp)" "\n" |
| 245 | |
| 246 | // Make sure the Probe::State is entirely below the result stack pointer so |
| 247 | // that register values are still preserved when we call the initializeStack |
| 248 | // function. |
| 249 | "movl $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %ecx" "\n" |
| 250 | "movl %ebp, %eax" "\n" |
| 251 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%ebp), %edx" "\n" |
| 252 | "addl %ecx, %eax" "\n" |
| 253 | "cmpl %eax, %edx" "\n" |
| 254 | "jge " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) "\n" |
| 255 | |
| 256 | // Allocate a safe place on the stack below the result stack pointer to stash the Probe::State. |
| 257 | "subl %ecx, %edx" "\n" |
| 258 | "andl $~0x1f, %edx" "\n" // Keep the stack pointer 32 bytes aligned. |
| 259 | "xorl %eax, %eax" "\n" |
| 260 | "movl %edx, %esp" "\n" |
| 261 | |
| 262 | "movl $" STRINGIZE_VALUE_OF(PROBE_SIZE) ", %ecx" "\n" |
| 263 | |
| 264 | // Copy the Probe::State to the safe place. |
| 265 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) ":" "\n" |
| 266 | "movl (%ebp, %eax), %edx" "\n" |
| 267 | "movl %edx, (%esp, %eax)" "\n" |
| 268 | "addl $" STRINGIZE_VALUE_OF(PTR_SIZE) ", %eax" "\n" |
| 269 | "cmpl %eax, %ecx" "\n" |
| 270 | "jg " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) "\n" |
| 271 | |
| 272 | "movl %esp, %ebp" "\n" |
| 273 | |
| 274 | // Call initializeStackFunction if present. |
| 275 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) ":" "\n" |
| 276 | "xorl %ecx, %ecx" "\n" |
| 277 | "addl " STRINGIZE_VALUE_OF(PROBE_INIT_STACK_FUNCTION_OFFSET) "(%ebp), %ecx" "\n" |
| 278 | "je " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) "\n" |
| 279 | |
| 280 | // Reserve stack space for the arg while maintaining the required stack |
| 281 | // pointer 32 byte alignment: |
| 282 | "subl $0x20, %esp" "\n" |
| 283 | "movl %ebp, 0(%esp)" "\n" // the Probe::State* arg. |
| 284 | "call *%ecx" "\n" |
| 285 | |
| 286 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) ":" "\n" |
| 287 | |
| 288 | // To enable probes to modify register state, we copy all registers |
| 289 | // out of the Probe::State before returning. |
| 290 | |
| 291 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%ebp), %edx" "\n" |
| 292 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%ebp), %ebx" "\n" |
| 293 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%ebp), %esi" "\n" |
| 294 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%ebp), %edi" "\n" |
| 295 | |
| 296 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp), %xmm0" "\n" |
| 297 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp), %xmm1" "\n" |
| 298 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp), %xmm2" "\n" |
| 299 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp), %xmm3" "\n" |
| 300 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp), %xmm4" "\n" |
| 301 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp), %xmm5" "\n" |
| 302 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp), %xmm6" "\n" |
| 303 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp), %xmm7" "\n" |
| 304 | |
| 305 | // There are 6 more registers left to restore: |
| 306 | // eax, ecx, ebp, esp, eip, and eflags. |
| 307 | |
| 308 | // The restoration process at ctiMasmProbeTrampolineEnd below works by popping |
| 309 | // 5 words off the stack into eflags, eax, ecx, ebp, and eip. These 5 words need |
| 310 | // to be pushed on top of the final esp value so that just by popping the 5 words, |
| 311 | // we'll get the esp that the probe wants to set. Let's call this area (for storing |
| 312 | // these 5 words) the restore area. |
| 313 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%ebp), %ecx" "\n" |
| 314 | "subl $5 * " STRINGIZE_VALUE_OF(PTR_SIZE) ", %ecx" "\n" |
| 315 | |
| 316 | // ecx now points to the restore area. |
| 317 | |
| 318 | // Copy remaining restore values from the Probe::State to the restore area. |
| 319 | // Note: We already ensured above that the Probe::State is in a safe location before |
| 320 | // calling the initializeStackFunction. The initializeStackFunction is not allowed to |
| 321 | // change the stack pointer again. |
| 322 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%ebp), %eax" "\n" |
| 323 | "movl %eax, 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
| 324 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%ebp), %eax" "\n" |
| 325 | "movl %eax, 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
| 326 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%ebp), %eax" "\n" |
| 327 | "movl %eax, 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
| 328 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%ebp), %eax" "\n" |
| 329 | "movl %eax, 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
| 330 | "movl " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%ebp), %eax" "\n" |
| 331 | "movl %eax, 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%ecx)" "\n" |
| 332 | "movl %ecx, %esp" "\n" |
| 333 | |
| 334 | // Do the remaining restoration by popping off the restore area. |
| 335 | "popfl" "\n" |
| 336 | "popl %eax" "\n" |
| 337 | "popl %ecx" "\n" |
| 338 | "popl %ebp" "\n" |
| 339 | "ret" "\n" |
| 340 | ); |
| 341 | #endif |
| 342 | |
| 343 | #if COMPILER(MSVC) |
| 344 | extern "C" __declspec(naked) void ctiMasmProbeTrampoline() |
| 345 | { |
| 346 | __asm { |
| 347 | pushfd; |
| 348 | |
| 349 | // MacroAssemblerX86Common::probe() has already generated code to store some values. |
| 350 | // Together with the eflags pushed above, the top of stack now looks like |
| 351 | // this: |
| 352 | // esp[0 * ptrSize]: eflags |
| 353 | // esp[1 * ptrSize]: return address / saved eip |
| 354 | // esp[2 * ptrSize]: saved ebx |
| 355 | // esp[3 * ptrSize]: saved edx |
| 356 | // esp[4 * ptrSize]: saved ecx |
| 357 | // esp[5 * ptrSize]: saved eax |
| 358 | // |
| 359 | // Incoming registers contain: |
| 360 | // ecx: Probe::executeProbe |
| 361 | // edx: probe function |
| 362 | // ebx: probe arg |
| 363 | // eax: scratch (was ctiMasmProbeTrampoline) |
| 364 | |
| 365 | mov eax, esp |
| 366 | sub esp, PROBE_SIZE + OUT_SIZE |
| 367 | |
| 368 | // The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes. |
| 369 | and esp, ~0x1f |
| 370 | |
| 371 | mov [PROBE_CPU_EBP_OFFSET + esp], ebp |
| 372 | mov ebp, esp // Save the ProbeContext*. |
| 373 | |
| 374 | mov [PROBE_EXECUTOR_OFFSET + ebp], ecx |
| 375 | mov [PROBE_PROBE_FUNCTION_OFFSET + ebp], edx |
| 376 | mov [PROBE_ARG_OFFSET + ebp], ebx |
| 377 | mov [PROBE_CPU_ESI_OFFSET + ebp], esi |
| 378 | mov [PROBE_CPU_EDI_OFFSET + ebp], edi |
| 379 | |
| 380 | mov ecx, [0 * PTR_SIZE + eax] |
| 381 | mov [PROBE_CPU_EFLAGS_OFFSET + ebp], ecx |
| 382 | mov ecx, [1 * PTR_SIZE + eax] |
| 383 | mov [PROBE_CPU_EIP_OFFSET + ebp], ecx |
| 384 | mov ecx, [2 * PTR_SIZE + eax] |
| 385 | mov [PROBE_CPU_EBX_OFFSET + ebp], ecx |
| 386 | mov ecx, [3 * PTR_SIZE + eax] |
| 387 | mov [PROBE_CPU_EDX_OFFSET + ebp], ecx |
| 388 | mov ecx, [4 * PTR_SIZE + eax] |
| 389 | mov [PROBE_CPU_ECX_OFFSET + ebp], ecx |
| 390 | mov ecx, [5 * PTR_SIZE + eax] |
| 391 | mov [PROBE_CPU_EAX_OFFSET + ebp], ecx |
| 392 | |
| 393 | mov ecx, eax |
| 394 | add ecx, 6 * PTR_SIZE |
| 395 | mov [PROBE_CPU_ESP_OFFSET + ebp], ecx |
| 396 | |
| 397 | movq qword ptr[PROBE_CPU_XMM0_OFFSET + ebp], xmm0 |
| 398 | movq qword ptr[PROBE_CPU_XMM1_OFFSET + ebp], xmm1 |
| 399 | movq qword ptr[PROBE_CPU_XMM2_OFFSET + ebp], xmm2 |
| 400 | movq qword ptr[PROBE_CPU_XMM3_OFFSET + ebp], xmm3 |
| 401 | movq qword ptr[PROBE_CPU_XMM4_OFFSET + ebp], xmm4 |
| 402 | movq qword ptr[PROBE_CPU_XMM5_OFFSET + ebp], xmm5 |
| 403 | movq qword ptr[PROBE_CPU_XMM6_OFFSET + ebp], xmm6 |
| 404 | movq qword ptr[PROBE_CPU_XMM7_OFFSET + ebp], xmm7 |
| 405 | |
| 406 | // Reserve stack space for the arg while maintaining the required stack |
| 407 | // pointer 32 byte alignment: |
| 408 | sub esp, 0x20 |
| 409 | mov [0 + esp], ebp // the ProbeContext* arg. |
| 410 | |
| 411 | call [PROBE_EXECUTOR_OFFSET + ebp] |
| 412 | |
| 413 | // Make sure the ProbeContext is entirely below the result stack pointer so |
| 414 | // that register values are still preserved when we call the initializeStack |
| 415 | // function. |
| 416 | mov ecx, PROBE_SIZE + OUT_SIZE |
| 417 | mov eax, ebp |
| 418 | mov edx, [PROBE_CPU_ESP_OFFSET + ebp] |
| 419 | add eax, ecx |
| 420 | cmp edx, eax |
| 421 | jge ctiMasmProbeTrampolineProbeContextIsSafe |
| 422 | |
| 423 | // Allocate a safe place on the stack below the result stack pointer to stash the ProbeContext. |
| 424 | sub edx, ecx |
| 425 | and edx, ~0x1f // Keep the stack pointer 32 bytes aligned. |
| 426 | xor eax, eax |
| 427 | mov esp, edx |
| 428 | |
| 429 | mov ecx, PROBE_SIZE |
| 430 | |
| 431 | // Copy the ProbeContext to the safe place. |
| 432 | ctiMasmProbeTrampolineCopyLoop : |
| 433 | mov edx, [ebp + eax] |
| 434 | mov [esp + eax], edx |
| 435 | add eax, PTR_SIZE |
| 436 | cmp ecx, eax |
| 437 | jg ctiMasmProbeTrampolineCopyLoop |
| 438 | |
| 439 | mov ebp, esp |
| 440 | |
| 441 | // Call initializeStackFunction if present. |
| 442 | ctiMasmProbeTrampolineProbeContextIsSafe : |
| 443 | xor ecx, ecx |
| 444 | add ecx, [PROBE_INIT_STACK_FUNCTION_OFFSET + ebp] |
| 445 | je ctiMasmProbeTrampolineRestoreRegisters |
| 446 | |
| 447 | // Reserve stack space for the arg while maintaining the required stack |
| 448 | // pointer 32 byte alignment: |
| 449 | sub esp, 0x20 |
| 450 | mov [0 + esp], ebp // the ProbeContext* arg. |
| 451 | call ecx |
| 452 | |
| 453 | ctiMasmProbeTrampolineRestoreRegisters : |
| 454 | |
| 455 | // To enable probes to modify register state, we copy all registers |
| 456 | // out of the ProbeContext before returning. |
| 457 | |
| 458 | mov edx, [PROBE_CPU_EDX_OFFSET + ebp] |
| 459 | mov ebx, [PROBE_CPU_EBX_OFFSET + ebp] |
| 460 | mov esi, [PROBE_CPU_ESI_OFFSET + ebp] |
| 461 | mov edi, [PROBE_CPU_EDI_OFFSET + ebp] |
| 462 | |
| 463 | movq xmm0, qword ptr[PROBE_CPU_XMM0_OFFSET + ebp] |
| 464 | movq xmm1, qword ptr[PROBE_CPU_XMM1_OFFSET + ebp] |
| 465 | movq xmm2, qword ptr[PROBE_CPU_XMM2_OFFSET + ebp] |
| 466 | movq xmm3, qword ptr[PROBE_CPU_XMM3_OFFSET + ebp] |
| 467 | movq xmm4, qword ptr[PROBE_CPU_XMM4_OFFSET + ebp] |
| 468 | movq xmm5, qword ptr[PROBE_CPU_XMM5_OFFSET + ebp] |
| 469 | movq xmm6, qword ptr[PROBE_CPU_XMM6_OFFSET + ebp] |
| 470 | movq xmm7, qword ptr[PROBE_CPU_XMM7_OFFSET + ebp] |
| 471 | |
| 472 | // There are 6 more registers left to restore: |
| 473 | // eax, ecx, ebp, esp, eip, and eflags. |
| 474 | |
| 475 | // The restoration process at ctiMasmProbeTrampolineEnd below works by popping |
| 476 | // 5 words off the stack into eflags, eax, ecx, ebp, and eip. These 5 words need |
| 477 | // to be pushed on top of the final esp value so that just by popping the 5 words, |
| 478 | // we'll get the esp that the probe wants to set. Let's call this area (for storing |
| 479 | // these 5 words) the restore area. |
| 480 | mov ecx, [PROBE_CPU_ESP_OFFSET + ebp] |
| 481 | sub ecx, 5 * PTR_SIZE |
| 482 | |
| 483 | // ecx now points to the restore area. |
| 484 | |
| 485 | // Copy remaining restore values from the ProbeContext to the restore area. |
| 486 | // Note: We already ensured above that the ProbeContext is in a safe location before |
| 487 | // calling the initializeStackFunction. The initializeStackFunction is not allowed to |
| 488 | // change the stack pointer again. |
| 489 | mov eax, [PROBE_CPU_EFLAGS_OFFSET + ebp] |
| 490 | mov [0 * PTR_SIZE + ecx], eax |
| 491 | mov eax, [PROBE_CPU_EAX_OFFSET + ebp] |
| 492 | mov [1 * PTR_SIZE + ecx], eax |
| 493 | mov eax, [PROBE_CPU_ECX_OFFSET + ebp] |
| 494 | mov [2 * PTR_SIZE + ecx], eax |
| 495 | mov eax, [PROBE_CPU_EBP_OFFSET + ebp] |
| 496 | mov [3 * PTR_SIZE + ecx], eax |
| 497 | mov eax, [PROBE_CPU_EIP_OFFSET + ebp] |
| 498 | mov [4 * PTR_SIZE + ecx], eax |
| 499 | mov esp, ecx |
| 500 | |
| 501 | // Do the remaining restoration by popping off the restore area. |
| 502 | popfd |
| 503 | pop eax |
| 504 | pop ecx |
| 505 | pop ebp |
| 506 | ret |
| 507 | } |
| 508 | } |
| 509 | #endif // COMPILER(MSVC) |
| 510 | |
| 511 | #endif // CPU(X86) |
| 512 | |
| 513 | #if CPU(X86_64) |
| 514 | #if COMPILER(GCC_COMPATIBLE) |
| 515 | asm ( |
| 516 | ".globl " SYMBOL_STRING(ctiMasmProbeTrampoline) "\n" |
| 517 | HIDE_SYMBOL(ctiMasmProbeTrampoline) "\n" |
| 518 | SYMBOL_STRING(ctiMasmProbeTrampoline) ":" "\n" |
| 519 | |
| 520 | "pushfq" "\n" |
| 521 | |
| 522 | // MacroAssemblerX86Common::probe() has already generated code to store some values. |
| 523 | // Together with the rflags pushed above, the top of stack now looks like this: |
| 524 | // rsp[0 * ptrSize]: rflags |
| 525 | // rsp[1 * ptrSize]: return address / saved rip |
| 526 | // rsp[2 * ptrSize]: saved rbx |
| 527 | // rsp[3 * ptrSize]: saved rdx |
| 528 | // rsp[4 * ptrSize]: saved rcx |
| 529 | // rsp[5 * ptrSize]: saved rax |
| 530 | // |
| 531 | // Incoming registers contain: |
| 532 | // rcx: Probe::executeProbe |
| 533 | // rdx: probe function |
| 534 | // rbx: probe arg |
| 535 | // rax: scratch (was ctiMasmProbeTrampoline) |
| 536 | |
| 537 | "movq %rsp, %rax" "\n" |
| 538 | "subq $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %rsp" "\n" |
| 539 | |
| 540 | // The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes. |
| 541 | "andq $~0x1f, %rsp" "\n" |
| 542 | // Since sp points to the Probe::State, we've ensured that it's protected from interrupts before we initialize it. |
| 543 | |
| 544 | "movq %rbp, " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%rsp)" "\n" |
| 545 | "movq %rsp, %rbp" "\n" // Save the Probe::State*. |
| 546 | |
| 547 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%rbp)" "\n" |
| 548 | "movq %rdx, " STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "(%rbp)" "\n" |
| 549 | "movq %rbx, " STRINGIZE_VALUE_OF(PROBE_ARG_OFFSET) "(%rbp)" "\n" |
| 550 | "movq %rsi, " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%rbp)" "\n" |
| 551 | "movq %rdi, " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%rbp)" "\n" |
| 552 | |
| 553 | "movq 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
| 554 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%rbp)" "\n" |
| 555 | "movq 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
| 556 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%rbp)" "\n" |
| 557 | "movq 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
| 558 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%rbp)" "\n" |
| 559 | "movq 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
| 560 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%rbp)" "\n" |
| 561 | "movq 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
| 562 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%rbp)" "\n" |
| 563 | "movq 5 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rax), %rcx" "\n" |
| 564 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%rbp)" "\n" |
| 565 | |
| 566 | "movq %rax, %rcx" "\n" |
| 567 | "addq $" STRINGIZE_VALUE_OF(6 * PTR_SIZE) ", %rcx" "\n" |
| 568 | "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%rbp)" "\n" |
| 569 | |
| 570 | "movq %r8, " STRINGIZE_VALUE_OF(PROBE_CPU_R8_OFFSET) "(%rbp)" "\n" |
| 571 | "movq %r9, " STRINGIZE_VALUE_OF(PROBE_CPU_R9_OFFSET) "(%rbp)" "\n" |
| 572 | "movq %r10, " STRINGIZE_VALUE_OF(PROBE_CPU_R10_OFFSET) "(%rbp)" "\n" |
| 573 | "movq %r11, " STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET) "(%rbp)" "\n" |
| 574 | "movq %r12, " STRINGIZE_VALUE_OF(PROBE_CPU_R12_OFFSET) "(%rbp)" "\n" |
| 575 | "movq %r13, " STRINGIZE_VALUE_OF(PROBE_CPU_R13_OFFSET) "(%rbp)" "\n" |
| 576 | "movq %r14, " STRINGIZE_VALUE_OF(PROBE_CPU_R14_OFFSET) "(%rbp)" "\n" |
| 577 | "movq %r15, " STRINGIZE_VALUE_OF(PROBE_CPU_R15_OFFSET) "(%rbp)" "\n" |
| 578 | |
| 579 | "movq %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp)" "\n" |
| 580 | "movq %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp)" "\n" |
| 581 | "movq %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp)" "\n" |
| 582 | "movq %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp)" "\n" |
| 583 | "movq %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp)" "\n" |
| 584 | "movq %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp)" "\n" |
| 585 | "movq %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp)" "\n" |
| 586 | "movq %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp)" "\n" |
| 587 | "movq %xmm8, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM8_OFFSET) "(%rbp)" "\n" |
| 588 | "movq %xmm9, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM9_OFFSET) "(%rbp)" "\n" |
| 589 | "movq %xmm10, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM10_OFFSET) "(%rbp)" "\n" |
| 590 | "movq %xmm11, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM11_OFFSET) "(%rbp)" "\n" |
| 591 | "movq %xmm12, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM12_OFFSET) "(%rbp)" "\n" |
| 592 | "movq %xmm13, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM13_OFFSET) "(%rbp)" "\n" |
| 593 | "movq %xmm14, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM14_OFFSET) "(%rbp)" "\n" |
| 594 | "movq %xmm15, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM15_OFFSET) "(%rbp)" "\n" |
| 595 | |
| 596 | "movq %rbp, %rdi" "\n" // the Probe::State* arg. |
| 597 | "call *" STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%rbp)" "\n" |
| 598 | |
| 599 | // Make sure the Probe::State is entirely below the result stack pointer so |
| 600 | // that register values are still preserved when we call the initializeStack |
| 601 | // function. |
| 602 | "movq $" STRINGIZE_VALUE_OF(PROBE_SIZE + OUT_SIZE) ", %rcx" "\n" |
| 603 | "movq %rbp, %rax" "\n" |
| 604 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%rbp), %rdx" "\n" |
| 605 | "addq %rcx, %rax" "\n" |
| 606 | "cmpq %rax, %rdx" "\n" |
| 607 | "jge " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) "\n" |
| 608 | |
| 609 | // Allocate a safe place on the stack below the result stack pointer to stash the Probe::State. |
| 610 | "subq %rcx, %rdx" "\n" |
| 611 | "andq $~0x1f, %rdx" "\n" // Keep the stack pointer 32 bytes aligned. |
| 612 | "xorq %rax, %rax" "\n" |
| 613 | "movq %rdx, %rsp" "\n" |
| 614 | |
| 615 | "movq $" STRINGIZE_VALUE_OF(PROBE_SIZE) ", %rcx" "\n" |
| 616 | |
| 617 | // Copy the Probe::State to the safe place. |
| 618 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) ":" "\n" |
| 619 | "movq (%rbp, %rax), %rdx" "\n" |
| 620 | "movq %rdx, (%rsp, %rax)" "\n" |
| 621 | "addq $" STRINGIZE_VALUE_OF(PTR_SIZE) ", %rax" "\n" |
| 622 | "cmpq %rax, %rcx" "\n" |
| 623 | "jg " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineCopyLoop) "\n" |
| 624 | |
| 625 | "movq %rsp, %rbp" "\n" |
| 626 | |
| 627 | // Call initializeStackFunction if present. |
| 628 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineProbeStateIsSafe) ":" "\n" |
| 629 | "xorq %rcx, %rcx" "\n" |
| 630 | "addq " STRINGIZE_VALUE_OF(PROBE_INIT_STACK_FUNCTION_OFFSET) "(%rbp), %rcx" "\n" |
| 631 | "je " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) "\n" |
| 632 | |
| 633 | "movq %rbp, %rdi" "\n" // the Probe::State* arg. |
| 634 | "call *%rcx" "\n" |
| 635 | |
| 636 | LOCAL_LABEL_STRING(ctiMasmProbeTrampolineRestoreRegisters) ":" "\n" |
| 637 | |
| 638 | // To enable probes to modify register state, we copy all registers |
| 639 | // out of the Probe::State before returning. |
| 640 | |
| 641 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%rbp), %rdx" "\n" |
| 642 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EBX_OFFSET) "(%rbp), %rbx" "\n" |
| 643 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%rbp), %rsi" "\n" |
| 644 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%rbp), %rdi" "\n" |
| 645 | |
| 646 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R8_OFFSET) "(%rbp), %r8" "\n" |
| 647 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R9_OFFSET) "(%rbp), %r9" "\n" |
| 648 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R10_OFFSET) "(%rbp), %r10" "\n" |
| 649 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET) "(%rbp), %r11" "\n" |
| 650 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R12_OFFSET) "(%rbp), %r12" "\n" |
| 651 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R13_OFFSET) "(%rbp), %r13" "\n" |
| 652 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R14_OFFSET) "(%rbp), %r14" "\n" |
| 653 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_R15_OFFSET) "(%rbp), %r15" "\n" |
| 654 | |
| 655 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp), %xmm0" "\n" |
| 656 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp), %xmm1" "\n" |
| 657 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp), %xmm2" "\n" |
| 658 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp), %xmm3" "\n" |
| 659 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp), %xmm4" "\n" |
| 660 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp), %xmm5" "\n" |
| 661 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp), %xmm6" "\n" |
| 662 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp), %xmm7" "\n" |
| 663 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM8_OFFSET) "(%rbp), %xmm8" "\n" |
| 664 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM9_OFFSET) "(%rbp), %xmm9" "\n" |
| 665 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM10_OFFSET) "(%rbp), %xmm10" "\n" |
| 666 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM11_OFFSET) "(%rbp), %xmm11" "\n" |
| 667 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM12_OFFSET) "(%rbp), %xmm12" "\n" |
| 668 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM13_OFFSET) "(%rbp), %xmm13" "\n" |
| 669 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM14_OFFSET) "(%rbp), %xmm14" "\n" |
| 670 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM15_OFFSET) "(%rbp), %xmm15" "\n" |
| 671 | |
| 672 | // There are 6 more registers left to restore: |
| 673 | // rax, rcx, rbp, rsp, rip, and rflags. |
| 674 | |
| 675 | // The restoration process at ctiMasmProbeTrampolineEnd below works by popping |
| 676 | // 5 words off the stack into rflags, rax, rcx, rbp, and rip. These 5 words need |
| 677 | // to be pushed on top of the final esp value so that just by popping the 5 words, |
| 678 | // we'll get the esp that the probe wants to set. Let's call this area (for storing |
| 679 | // these 5 words) the restore area. |
| 680 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%rbp), %rcx" "\n" |
| 681 | "subq $5 * " STRINGIZE_VALUE_OF(PTR_SIZE) ", %rcx" "\n" |
| 682 | |
| 683 | // rcx now points to the restore area. |
| 684 | |
| 685 | // Copy remaining restore values from the Probe::State to the restore area. |
| 686 | // Note: We already ensured above that the Probe::State is in a safe location before |
| 687 | // calling the initializeStackFunction. The initializeStackFunction is not allowed to |
| 688 | // change the stack pointer again. |
| 689 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EFLAGS_OFFSET) "(%rbp), %rax" "\n" |
| 690 | "movq %rax, 0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
| 691 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%rbp), %rax" "\n" |
| 692 | "movq %rax, 1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
| 693 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%rbp), %rax" "\n" |
| 694 | "movq %rax, 2 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
| 695 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EBP_OFFSET) "(%rbp), %rax" "\n" |
| 696 | "movq %rax, 3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
| 697 | "movq " STRINGIZE_VALUE_OF(PROBE_CPU_EIP_OFFSET) "(%rbp), %rax" "\n" |
| 698 | "movq %rax, 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rcx)" "\n" |
| 699 | "movq %rcx, %rsp" "\n" |
| 700 | |
| 701 | // Do the remaining restoration by popping off the restore area. |
| 702 | "popfq" "\n" |
| 703 | "popq %rax" "\n" |
| 704 | "popq %rcx" "\n" |
| 705 | "popq %rbp" "\n" |
| 706 | "ret" "\n" |
| 707 | ); |
| 708 | #endif // COMPILER(GCC_COMPATIBLE) |
| 709 | #endif // CPU(X86_64) |
| 710 | |
| 711 | // What code is emitted for the probe? |
| 712 | // ================================== |
| 713 | // We want to keep the size of the emitted probe invocation code as compact as |
| 714 | // possible to minimize the perturbation to the JIT generated code. However, |
| 715 | // we also need to preserve the CPU registers and set up the Probe::State to be |
| 716 | // passed to the user probe function. |
| 717 | // |
| 718 | // Hence, we do only the minimum here to preserve a scratch register (i.e. rax |
| 719 | // in this case) and the stack pointer (i.e. rsp), and pass the probe arguments. |
| 720 | // We'll let the ctiMasmProbeTrampoline handle the rest of the probe invocation |
| 721 | // work i.e. saving the CPUState (and setting up the Probe::State), calling the |
| 722 | // user probe function, and restoring the CPUState before returning to JIT |
| 723 | // generated code. |
| 724 | // |
| 725 | // What registers need to be saved? |
| 726 | // =============================== |
| 727 | // The registers are saved for 2 reasons: |
| 728 | // 1. To preserve their state in the JITted code. This means that all registers |
| 729 | // that are not callee saved needs to be saved. We also need to save the |
| 730 | // condition code registers because the probe can be inserted between a test |
| 731 | // and a branch. |
| 732 | // 2. To allow the probe to inspect the values of the registers for debugging |
| 733 | // purposes. This means all registers need to be saved. |
| 734 | // |
| 735 | // In summary, save everything. But for reasons stated above, we should do the |
| 736 | // minimum here and let ctiMasmProbeTrampoline do the heavy lifting to save the |
| 737 | // full set. |
| 738 | // |
| 739 | // What values are in the saved registers? |
| 740 | // ====================================== |
| 741 | // Conceptually, the saved registers should contain values as if the probe |
| 742 | // is not present in the JIT generated code. Hence, they should contain values |
| 743 | // that are expected at the start of the instruction immediately following the |
| 744 | // probe. |
| 745 | // |
| 746 | // Specifically, the saved stack pointer register will point to the stack |
| 747 | // position before we push the Probe::State frame. The saved rip will point to |
| 748 | // the address of the instruction immediately following the probe. |
| 749 | |
| 750 | void MacroAssembler::probe(Probe::Function function, void* arg) |
| 751 | { |
| 752 | push(RegisterID::eax); |
| 753 | move(TrustedImmPtr(reinterpret_cast<void*>(ctiMasmProbeTrampoline)), RegisterID::eax); |
| 754 | push(RegisterID::ecx); |
| 755 | move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeProbe)), RegisterID::ecx); |
| 756 | push(RegisterID::edx); |
| 757 | move(TrustedImmPtr(reinterpret_cast<void*>(function)), RegisterID::edx); |
| 758 | push(RegisterID::ebx); |
| 759 | move(TrustedImmPtr(arg), RegisterID::ebx); |
| 760 | call(RegisterID::eax, CFunctionPtrTag); |
| 761 | } |
| 762 | #endif // ENABLE(MASM_PROBE) |
| 763 | |
| 764 | MacroAssemblerX86Common::CPUID MacroAssemblerX86Common::getCPUID(unsigned level) |
| 765 | { |
| 766 | return getCPUIDEx(level, 0); |
| 767 | } |
| 768 | |
| 769 | MacroAssemblerX86Common::CPUID MacroAssemblerX86Common::getCPUIDEx(unsigned level, unsigned count) |
| 770 | { |
| 771 | CPUID result { }; |
| 772 | #if COMPILER(MSVC) |
| 773 | __cpuidex(bitwise_cast<int*>(result.data()), level, count); |
| 774 | #else |
| 775 | __asm__ ( |
| 776 | "cpuid\n" |
| 777 | : "=a" (result[0]), "=b" (result[1]), "=c" (result[2]), "=d" (result[3]) |
| 778 | : "0" (level), "2" (count) |
| 779 | ); |
| 780 | #endif |
| 781 | return result; |
| 782 | } |
| 783 | |
| 784 | void MacroAssemblerX86Common::collectCPUFeatures() |
| 785 | { |
| 786 | static std::once_flag onceKey; |
| 787 | std::call_once(onceKey, [] { |
| 788 | { |
| 789 | CPUID cpuid = getCPUID(0x1); |
| 790 | s_sse4_1CheckState = (cpuid[2] & (1 << 19)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
| 791 | s_sse4_2CheckState = (cpuid[2] & (1 << 20)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
| 792 | s_popcntCheckState = (cpuid[2] & (1 << 23)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
| 793 | s_avxCheckState = (cpuid[2] & (1 << 28)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
| 794 | } |
| 795 | { |
| 796 | CPUID cpuid = getCPUID(0x7); |
| 797 | s_bmi1CheckState = (cpuid[2] & (1 << 3)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
| 798 | } |
| 799 | { |
| 800 | CPUID cpuid = getCPUID(0x80000001); |
| 801 | s_lzcntCheckState = (cpuid[2] & (1 << 5)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear; |
| 802 | } |
| 803 | }); |
| 804 | } |
| 805 | |
| 806 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked; |
| 807 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_2CheckState = CPUIDCheckState::NotChecked; |
| 808 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avxCheckState = CPUIDCheckState::NotChecked; |
| 809 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked; |
| 810 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_bmi1CheckState = CPUIDCheckState::NotChecked; |
| 811 | MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_popcntCheckState = CPUIDCheckState::NotChecked; |
| 812 | |
| 813 | } // namespace JSC |
| 814 | |
| 815 | #endif // ENABLE(ASSEMBLER) && (CPU(X86) || CPU(X86_64)) |
| 816 | |