1 | /* |
2 | * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "AirFixPartialRegisterStalls.h" |
28 | |
29 | #if ENABLE(B3_JIT) |
30 | |
31 | #include "AirBasicBlock.h" |
32 | #include "AirCode.h" |
33 | #include "AirInsertionSet.h" |
34 | #include "AirInst.h" |
35 | #include "AirInstInlines.h" |
36 | #include "AirPhaseScope.h" |
37 | #include "MacroAssembler.h" |
38 | #include <wtf/IndexMap.h> |
39 | #include <wtf/IndexSet.h> |
40 | #include <wtf/Vector.h> |
41 | |
42 | namespace JSC { namespace B3 { namespace Air { |
43 | |
44 | namespace { |
45 | |
46 | bool hasPartialXmmRegUpdate(const Inst& inst) |
47 | { |
48 | switch (inst.kind.opcode) { |
49 | case ConvertDoubleToFloat: |
50 | case ConvertFloatToDouble: |
51 | case ConvertInt32ToDouble: |
52 | case ConvertInt64ToDouble: |
53 | case ConvertInt32ToFloat: |
54 | case ConvertInt64ToFloat: |
55 | case SqrtDouble: |
56 | case SqrtFloat: |
57 | case CeilDouble: |
58 | case CeilFloat: |
59 | case FloorDouble: |
60 | case FloorFloat: |
61 | return true; |
62 | default: |
63 | break; |
64 | } |
65 | return false; |
66 | } |
67 | |
68 | bool isDependencyBreaking(const Inst& inst) |
69 | { |
70 | // "xorps reg, reg" is used by the frontend to remove the dependency on its argument. |
71 | return inst.kind.opcode == MoveZeroToDouble; |
72 | } |
73 | |
74 | // FIXME: find a good distance per architecture experimentally. |
75 | // LLVM uses a distance of 16 but that comes from Nehalem. |
76 | unsigned char minimumSafeDistance = 16; |
77 | |
78 | struct FPDefDistance { |
79 | FPDefDistance() |
80 | { |
81 | for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) |
82 | distance[i] = 255; |
83 | } |
84 | |
85 | void reset(FPRReg reg) |
86 | { |
87 | unsigned index = MacroAssembler::fpRegisterIndex(reg); |
88 | distance[index] = 255; |
89 | } |
90 | |
91 | void add(FPRReg reg, unsigned registerDistance) |
92 | { |
93 | unsigned index = MacroAssembler::fpRegisterIndex(reg); |
94 | if (registerDistance < distance[index]) |
95 | distance[index] = static_cast<unsigned char>(registerDistance); |
96 | } |
97 | |
98 | bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0) |
99 | { |
100 | bool changed = false; |
101 | for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) { |
102 | unsigned regDistance = precessorDistance.distance[i] + constantOffset; |
103 | if (regDistance < minimumSafeDistance && regDistance < distance[i]) { |
104 | distance[i] = regDistance; |
105 | changed = true; |
106 | } |
107 | } |
108 | return changed; |
109 | } |
110 | |
111 | unsigned char distance[MacroAssembler::numberOfFPRegisters()]; |
112 | }; |
113 | |
114 | void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd) |
115 | { |
116 | --distanceToBlockEnd; |
117 | |
118 | if (isDependencyBreaking(inst)) { |
119 | localDistance.reset(inst.args[0].tmp().fpr()); |
120 | return; |
121 | } |
122 | |
123 | inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) { |
124 | ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation." ); |
125 | |
126 | if (tmp.isFPR() && Arg::isAnyDef(role)) |
127 | localDistance.add(tmp.fpr(), distanceToBlockEnd); |
128 | }); |
129 | } |
130 | |
131 | } |
132 | |
133 | void fixPartialRegisterStalls(Code& code) |
134 | { |
135 | if (!isX86()) |
136 | return; |
137 | |
138 | PhaseScope phaseScope(code, "fixPartialRegisterStalls" ); |
139 | |
140 | Vector<BasicBlock*> candidates; |
141 | |
142 | for (BasicBlock* block : code) { |
143 | for (const Inst& inst : *block) { |
144 | if (hasPartialXmmRegUpdate(inst)) { |
145 | candidates.append(block); |
146 | break; |
147 | } |
148 | } |
149 | } |
150 | |
151 | // Fortunately, Partial Stalls are rarely used. Return early if no block |
152 | // cares about them. |
153 | if (candidates.isEmpty()) |
154 | return; |
155 | |
156 | // For each block, this provides the distance to the last instruction setting each register |
157 | // on block *entry*. |
158 | IndexMap<BasicBlock*, FPDefDistance> lastDefDistance(code.size()); |
159 | |
160 | // Blocks with dirty distance at head. |
161 | IndexSet<BasicBlock*> dirty; |
162 | |
163 | // First, we compute the local distance for each block and push it to the successors. |
164 | for (BasicBlock* block : code) { |
165 | FPDefDistance localDistance; |
166 | |
167 | unsigned distanceToBlockEnd = block->size(); |
168 | for (Inst& inst : *block) |
169 | updateDistances(inst, localDistance, distanceToBlockEnd); |
170 | |
171 | for (BasicBlock* successor : block->successorBlocks()) { |
172 | if (lastDefDistance[successor].updateFromPrecessor(localDistance)) |
173 | dirty.add(successor); |
174 | } |
175 | } |
176 | |
177 | // Now we propagate the minimums accross blocks. |
178 | bool changed; |
179 | do { |
180 | changed = false; |
181 | |
182 | for (BasicBlock* block : code) { |
183 | if (!dirty.remove(block)) |
184 | continue; |
185 | |
186 | // Little shortcut: if the block is big enough, propagating it won't add any information. |
187 | if (block->size() >= minimumSafeDistance) |
188 | continue; |
189 | |
190 | unsigned blockSize = block->size(); |
191 | FPDefDistance& blockDistance = lastDefDistance[block]; |
192 | for (BasicBlock* successor : block->successorBlocks()) { |
193 | if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) { |
194 | dirty.add(successor); |
195 | changed = true; |
196 | } |
197 | } |
198 | } |
199 | } while (changed); |
200 | |
201 | // Finally, update each block as needed. |
202 | InsertionSet insertionSet(code); |
203 | for (BasicBlock* block : candidates) { |
204 | unsigned distanceToBlockEnd = block->size(); |
205 | FPDefDistance& localDistance = lastDefDistance[block]; |
206 | |
207 | for (unsigned i = 0; i < block->size(); ++i) { |
208 | Inst& inst = block->at(i); |
209 | |
210 | if (hasPartialXmmRegUpdate(inst)) { |
211 | RegisterSet defs; |
212 | RegisterSet uses; |
213 | inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) { |
214 | if (tmp.isFPR()) { |
215 | if (Arg::isAnyDef(role)) |
216 | defs.set(tmp.fpr()); |
217 | if (Arg::isAnyUse(role)) |
218 | uses.set(tmp.fpr()); |
219 | } |
220 | }); |
221 | // We only care about values we define but not use. Otherwise we have to wait |
222 | // for the value to be resolved anyway. |
223 | defs.exclude(uses); |
224 | |
225 | defs.forEach([&] (Reg reg) { |
226 | if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance) |
227 | insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg)); |
228 | }); |
229 | } |
230 | |
231 | updateDistances(inst, localDistance, distanceToBlockEnd); |
232 | } |
233 | insertionSet.execute(block); |
234 | } |
235 | } |
236 | |
237 | } } } // namespace JSC::B3::Air |
238 | |
239 | #endif // ENABLE(B3_JIT) |
240 | |