1/*
2 * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "AirFixPartialRegisterStalls.h"
28
29#if ENABLE(B3_JIT)
30
31#include "AirBasicBlock.h"
32#include "AirCode.h"
33#include "AirInsertionSet.h"
34#include "AirInst.h"
35#include "AirInstInlines.h"
36#include "AirPhaseScope.h"
37#include "MacroAssembler.h"
38#include <wtf/IndexMap.h>
39#include <wtf/IndexSet.h>
40#include <wtf/Vector.h>
41
42namespace JSC { namespace B3 { namespace Air {
43
44namespace {
45
46bool hasPartialXmmRegUpdate(const Inst& inst)
47{
48 switch (inst.kind.opcode) {
49 case ConvertDoubleToFloat:
50 case ConvertFloatToDouble:
51 case ConvertInt32ToDouble:
52 case ConvertInt64ToDouble:
53 case ConvertInt32ToFloat:
54 case ConvertInt64ToFloat:
55 case SqrtDouble:
56 case SqrtFloat:
57 case CeilDouble:
58 case CeilFloat:
59 case FloorDouble:
60 case FloorFloat:
61 return true;
62 default:
63 break;
64 }
65 return false;
66}
67
68bool isDependencyBreaking(const Inst& inst)
69{
70 // "xorps reg, reg" is used by the frontend to remove the dependency on its argument.
71 return inst.kind.opcode == MoveZeroToDouble;
72}
73
74// FIXME: find a good distance per architecture experimentally.
75// LLVM uses a distance of 16 but that comes from Nehalem.
76unsigned char minimumSafeDistance = 16;
77
78struct FPDefDistance {
79 FPDefDistance()
80 {
81 for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i)
82 distance[i] = 255;
83 }
84
85 void reset(FPRReg reg)
86 {
87 unsigned index = MacroAssembler::fpRegisterIndex(reg);
88 distance[index] = 255;
89 }
90
91 void add(FPRReg reg, unsigned registerDistance)
92 {
93 unsigned index = MacroAssembler::fpRegisterIndex(reg);
94 if (registerDistance < distance[index])
95 distance[index] = static_cast<unsigned char>(registerDistance);
96 }
97
98 bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0)
99 {
100 bool changed = false;
101 for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) {
102 unsigned regDistance = precessorDistance.distance[i] + constantOffset;
103 if (regDistance < minimumSafeDistance && regDistance < distance[i]) {
104 distance[i] = regDistance;
105 changed = true;
106 }
107 }
108 return changed;
109 }
110
111 unsigned char distance[MacroAssembler::numberOfFPRegisters()];
112};
113
114void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd)
115{
116 --distanceToBlockEnd;
117
118 if (isDependencyBreaking(inst)) {
119 localDistance.reset(inst.args[0].tmp().fpr());
120 return;
121 }
122
123 inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) {
124 ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation.");
125
126 if (tmp.isFPR() && Arg::isAnyDef(role))
127 localDistance.add(tmp.fpr(), distanceToBlockEnd);
128 });
129}
130
131}
132
133void fixPartialRegisterStalls(Code& code)
134{
135 if (!isX86())
136 return;
137
138 PhaseScope phaseScope(code, "fixPartialRegisterStalls");
139
140 Vector<BasicBlock*> candidates;
141
142 for (BasicBlock* block : code) {
143 for (const Inst& inst : *block) {
144 if (hasPartialXmmRegUpdate(inst)) {
145 candidates.append(block);
146 break;
147 }
148 }
149 }
150
151 // Fortunately, Partial Stalls are rarely used. Return early if no block
152 // cares about them.
153 if (candidates.isEmpty())
154 return;
155
156 // For each block, this provides the distance to the last instruction setting each register
157 // on block *entry*.
158 IndexMap<BasicBlock*, FPDefDistance> lastDefDistance(code.size());
159
160 // Blocks with dirty distance at head.
161 IndexSet<BasicBlock*> dirty;
162
163 // First, we compute the local distance for each block and push it to the successors.
164 for (BasicBlock* block : code) {
165 FPDefDistance localDistance;
166
167 unsigned distanceToBlockEnd = block->size();
168 for (Inst& inst : *block)
169 updateDistances(inst, localDistance, distanceToBlockEnd);
170
171 for (BasicBlock* successor : block->successorBlocks()) {
172 if (lastDefDistance[successor].updateFromPrecessor(localDistance))
173 dirty.add(successor);
174 }
175 }
176
177 // Now we propagate the minimums accross blocks.
178 bool changed;
179 do {
180 changed = false;
181
182 for (BasicBlock* block : code) {
183 if (!dirty.remove(block))
184 continue;
185
186 // Little shortcut: if the block is big enough, propagating it won't add any information.
187 if (block->size() >= minimumSafeDistance)
188 continue;
189
190 unsigned blockSize = block->size();
191 FPDefDistance& blockDistance = lastDefDistance[block];
192 for (BasicBlock* successor : block->successorBlocks()) {
193 if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) {
194 dirty.add(successor);
195 changed = true;
196 }
197 }
198 }
199 } while (changed);
200
201 // Finally, update each block as needed.
202 InsertionSet insertionSet(code);
203 for (BasicBlock* block : candidates) {
204 unsigned distanceToBlockEnd = block->size();
205 FPDefDistance& localDistance = lastDefDistance[block];
206
207 for (unsigned i = 0; i < block->size(); ++i) {
208 Inst& inst = block->at(i);
209
210 if (hasPartialXmmRegUpdate(inst)) {
211 RegisterSet defs;
212 RegisterSet uses;
213 inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) {
214 if (tmp.isFPR()) {
215 if (Arg::isAnyDef(role))
216 defs.set(tmp.fpr());
217 if (Arg::isAnyUse(role))
218 uses.set(tmp.fpr());
219 }
220 });
221 // We only care about values we define but not use. Otherwise we have to wait
222 // for the value to be resolved anyway.
223 defs.exclude(uses);
224
225 defs.forEach([&] (Reg reg) {
226 if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance)
227 insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg));
228 });
229 }
230
231 updateDistances(inst, localDistance, distanceToBlockEnd);
232 }
233 insertionSet.execute(block);
234 }
235}
236
237} } } // namespace JSC::B3::Air
238
239#endif // ENABLE(B3_JIT)
240