Skip to content

Commit 3f15b66

Browse files
authored
[SPARC] Use op-then-halve instructions when we have VIS3
Reviewers: brad0, s-barannikov, arsenm, rorth Reviewed By: arsenm Pull Request: #135718
1 parent 52c2e45 commit 3f15b66

File tree

4 files changed

+352
-2
lines changed

4 files changed

+352
-2
lines changed

llvm/lib/Target/Sparc/SparcISelLowering.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3562,8 +3562,14 @@ bool SparcTargetLowering::isFNegFree(EVT VT) const {
35623562

35633563
bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
35643564
bool ForCodeSize) const {
3565-
return Subtarget->isVIS() && (VT == MVT::f32 || VT == MVT::f64) &&
3566-
Imm.isZero();
3565+
if (VT != MVT::f32 && VT != MVT::f64)
3566+
return false;
3567+
if (Subtarget->isVIS() && Imm.isZero())
3568+
return true;
3569+
if (Subtarget->isVIS3())
3570+
return Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5) ||
3571+
Imm.getExactLog2Abs() == -1;
3572+
return false;
35673573
}
35683574

35693575
bool SparcTargetLowering::isCtlzFast() const { return Subtarget->isVIS3(); }

llvm/lib/Target/Sparc/SparcInstrVIS.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>;
281281
// FP immediate patterns.
282282
def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
283283
def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
284+
def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
285+
def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;
284286

285287
// VIS instruction patterns.
286288
let Predicates = [HasVIS] in {
@@ -293,6 +295,16 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
293295

294296
// VIS3 instruction patterns.
295297
let Predicates = [HasVIS3] in {
298+
// +/-0.5 immediate.
299+
// This is needed to enable halving instructions.
300+
// FIXME generalize this to arbitrary immediates.
301+
// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
302+
// faster than constant pool loading.
303+
def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
304+
def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
305+
def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
306+
def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;
307+
296308
def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
297309

298310
def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
@@ -329,4 +341,12 @@ def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
329341
def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
330342
def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
331343
def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
344+
345+
// Op-then-halve FP operations.
346+
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
347+
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
348+
def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
349+
def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
350+
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
351+
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
332352
} // Predicates = [HasVIS3]

llvm/test/CodeGen/SPARC/float-constants.ll

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc < %s -mtriple=sparc | FileCheck %s
33
; RUN: llc < %s -mtriple=sparcel | FileCheck %s --check-prefix=CHECK-LE
44
; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis | FileCheck %s --check-prefix=CHECK-VIS
5+
; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis,+vis3 | FileCheck %s --check-prefix=CHECK-VIS3
56

67
;; Bitcast should not do a runtime conversion, but rather emit a
78
;; constant into integer registers directly.
@@ -24,6 +25,12 @@ define <2 x i32> @bitcast() nounwind {
2425
; CHECK-VIS-NEXT: sethi 1049856, %o0
2526
; CHECK-VIS-NEXT: retl
2627
; CHECK-VIS-NEXT: mov %g0, %o1
28+
;
29+
; CHECK-VIS3-LABEL: bitcast:
30+
; CHECK-VIS3: ! %bb.0:
31+
; CHECK-VIS3-NEXT: sethi 1049856, %o0
32+
; CHECK-VIS3-NEXT: retl
33+
; CHECK-VIS3-NEXT: mov %g0, %o1
2734
%1 = bitcast double 5.0 to <2 x i32>
2835
ret <2 x i32> %1
2936
}
@@ -61,6 +68,17 @@ define void @test_call() nounwind {
6168
; CHECK-VIS-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0
6269
; CHECK-VIS-NEXT: ret
6370
; CHECK-VIS-NEXT: restore
71+
;
72+
; CHECK-VIS3-LABEL: test_call:
73+
; CHECK-VIS3: ! %bb.0:
74+
; CHECK-VIS3-NEXT: save %sp, -176, %sp
75+
; CHECK-VIS3-NEXT: sethi %h44(.LCPI1_0), %i0
76+
; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI1_0), %i0
77+
; CHECK-VIS3-NEXT: sllx %i0, 12, %i0
78+
; CHECK-VIS3-NEXT: call a
79+
; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0
80+
; CHECK-VIS3-NEXT: ret
81+
; CHECK-VIS3-NEXT: restore
6482
call void @a(double 5.0)
6583
ret void
6684
}
@@ -106,6 +124,19 @@ define double @test_intrins_call() nounwind {
106124
; CHECK-VIS-NEXT: nop
107125
; CHECK-VIS-NEXT: ret
108126
; CHECK-VIS-NEXT: restore
127+
;
128+
; CHECK-VIS3-LABEL: test_intrins_call:
129+
; CHECK-VIS3: ! %bb.0:
130+
; CHECK-VIS3-NEXT: save %sp, -176, %sp
131+
; CHECK-VIS3-NEXT: sethi %h44(.LCPI2_0), %i0
132+
; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI2_0), %i0
133+
; CHECK-VIS3-NEXT: sllx %i0, 12, %i0
134+
; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI2_0)], %f0
135+
; CHECK-VIS3-NEXT: fmovd %f0, %f2
136+
; CHECK-VIS3-NEXT: call pow
137+
; CHECK-VIS3-NEXT: nop
138+
; CHECK-VIS3-NEXT: ret
139+
; CHECK-VIS3-NEXT: restore
109140
%1 = call double @llvm.pow.f64(double 2.0, double 2.0)
110141
ret double %1
111142
}
@@ -129,6 +160,11 @@ define double @pos_zero_double() nounwind {
129160
; CHECK-VIS: ! %bb.0:
130161
; CHECK-VIS-NEXT: retl
131162
; CHECK-VIS-NEXT: fzero %f0
163+
;
164+
; CHECK-VIS3-LABEL: pos_zero_double:
165+
; CHECK-VIS3: ! %bb.0:
166+
; CHECK-VIS3-NEXT: retl
167+
; CHECK-VIS3-NEXT: fzero %f0
132168
ret double +0.0
133169
}
134170

@@ -150,6 +186,12 @@ define double @neg_zero_double() nounwind {
150186
; CHECK-VIS-NEXT: fzero %f0
151187
; CHECK-VIS-NEXT: retl
152188
; CHECK-VIS-NEXT: fnegd %f0, %f0
189+
;
190+
; CHECK-VIS3-LABEL: neg_zero_double:
191+
; CHECK-VIS3: ! %bb.0:
192+
; CHECK-VIS3-NEXT: fzero %f0
193+
; CHECK-VIS3-NEXT: retl
194+
; CHECK-VIS3-NEXT: fnegd %f0, %f0
153195
ret double -0.0
154196
}
155197

@@ -170,6 +212,11 @@ define float @pos_zero_float() nounwind {
170212
; CHECK-VIS: ! %bb.0:
171213
; CHECK-VIS-NEXT: retl
172214
; CHECK-VIS-NEXT: fzeros %f0
215+
;
216+
; CHECK-VIS3-LABEL: pos_zero_float:
217+
; CHECK-VIS3: ! %bb.0:
218+
; CHECK-VIS3-NEXT: retl
219+
; CHECK-VIS3-NEXT: fzeros %f0
173220
ret float +0.0
174221
}
175222

@@ -191,5 +238,131 @@ define float @neg_zero_float() nounwind {
191238
; CHECK-VIS-NEXT: fzeros %f0
192239
; CHECK-VIS-NEXT: retl
193240
; CHECK-VIS-NEXT: fnegs %f0, %f0
241+
;
242+
; CHECK-VIS3-LABEL: neg_zero_float:
243+
; CHECK-VIS3: ! %bb.0:
244+
; CHECK-VIS3-NEXT: fzeros %f0
245+
; CHECK-VIS3-NEXT: retl
246+
; CHECK-VIS3-NEXT: fnegs %f0, %f0
194247
ret float -0.0
195248
}
249+
250+
;; When we have VIS3, f32/f64 +/-0.5 constant should be materialized from sethi.
251+
252+
define double @pos_half_double() nounwind {
253+
; CHECK-LABEL: pos_half_double:
254+
; CHECK: ! %bb.0:
255+
; CHECK-NEXT: sethi %hi(.LCPI7_0), %o0
256+
; CHECK-NEXT: retl
257+
; CHECK-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0
258+
;
259+
; CHECK-LE-LABEL: pos_half_double:
260+
; CHECK-LE: ! %bb.0:
261+
; CHECK-LE-NEXT: sethi %hi(.LCPI7_0), %o0
262+
; CHECK-LE-NEXT: retl
263+
; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0
264+
;
265+
; CHECK-VIS-LABEL: pos_half_double:
266+
; CHECK-VIS: ! %bb.0:
267+
; CHECK-VIS-NEXT: sethi %h44(.LCPI7_0), %o0
268+
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI7_0), %o0
269+
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
270+
; CHECK-VIS-NEXT: retl
271+
; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI7_0)], %f0
272+
;
273+
; CHECK-VIS3-LABEL: pos_half_double:
274+
; CHECK-VIS3: ! %bb.0:
275+
; CHECK-VIS3-NEXT: sethi 1046528, %o0
276+
; CHECK-VIS3-NEXT: sllx %o0, 32, %o0
277+
; CHECK-VIS3-NEXT: retl
278+
; CHECK-VIS3-NEXT: movxtod %o0, %f0
279+
ret double +0.5
280+
}
281+
282+
define double @neg_half_double() nounwind {
283+
; CHECK-LABEL: neg_half_double:
284+
; CHECK: ! %bb.0:
285+
; CHECK-NEXT: sethi %hi(.LCPI8_0), %o0
286+
; CHECK-NEXT: retl
287+
; CHECK-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0
288+
;
289+
; CHECK-LE-LABEL: neg_half_double:
290+
; CHECK-LE: ! %bb.0:
291+
; CHECK-LE-NEXT: sethi %hi(.LCPI8_0), %o0
292+
; CHECK-LE-NEXT: retl
293+
; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0
294+
;
295+
; CHECK-VIS-LABEL: neg_half_double:
296+
; CHECK-VIS: ! %bb.0:
297+
; CHECK-VIS-NEXT: sethi %h44(.LCPI8_0), %o0
298+
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI8_0), %o0
299+
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
300+
; CHECK-VIS-NEXT: retl
301+
; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI8_0)], %f0
302+
;
303+
; CHECK-VIS3-LABEL: neg_half_double:
304+
; CHECK-VIS3: ! %bb.0:
305+
; CHECK-VIS3-NEXT: sethi 3143680, %o0
306+
; CHECK-VIS3-NEXT: sllx %o0, 32, %o0
307+
; CHECK-VIS3-NEXT: retl
308+
; CHECK-VIS3-NEXT: movxtod %o0, %f0
309+
ret double -0.5
310+
}
311+
312+
define float @pos_half_float() nounwind {
313+
; CHECK-LABEL: pos_half_float:
314+
; CHECK: ! %bb.0:
315+
; CHECK-NEXT: sethi %hi(.LCPI9_0), %o0
316+
; CHECK-NEXT: retl
317+
; CHECK-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0
318+
;
319+
; CHECK-LE-LABEL: pos_half_float:
320+
; CHECK-LE: ! %bb.0:
321+
; CHECK-LE-NEXT: sethi %hi(.LCPI9_0), %o0
322+
; CHECK-LE-NEXT: retl
323+
; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0
324+
;
325+
; CHECK-VIS-LABEL: pos_half_float:
326+
; CHECK-VIS: ! %bb.0:
327+
; CHECK-VIS-NEXT: sethi %h44(.LCPI9_0), %o0
328+
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI9_0), %o0
329+
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
330+
; CHECK-VIS-NEXT: retl
331+
; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI9_0)], %f0
332+
;
333+
; CHECK-VIS3-LABEL: pos_half_float:
334+
; CHECK-VIS3: ! %bb.0:
335+
; CHECK-VIS3-NEXT: sethi 1032192, %o0
336+
; CHECK-VIS3-NEXT: retl
337+
; CHECK-VIS3-NEXT: movwtos %o0, %f0
338+
ret float +0.5
339+
}
340+
341+
define float @neg_half_float() nounwind {
342+
; CHECK-LABEL: neg_half_float:
343+
; CHECK: ! %bb.0:
344+
; CHECK-NEXT: sethi %hi(.LCPI10_0), %o0
345+
; CHECK-NEXT: retl
346+
; CHECK-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0
347+
;
348+
; CHECK-LE-LABEL: neg_half_float:
349+
; CHECK-LE: ! %bb.0:
350+
; CHECK-LE-NEXT: sethi %hi(.LCPI10_0), %o0
351+
; CHECK-LE-NEXT: retl
352+
; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0
353+
;
354+
; CHECK-VIS-LABEL: neg_half_float:
355+
; CHECK-VIS: ! %bb.0:
356+
; CHECK-VIS-NEXT: sethi %h44(.LCPI10_0), %o0
357+
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI10_0), %o0
358+
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
359+
; CHECK-VIS-NEXT: retl
360+
; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI10_0)], %f0
361+
;
362+
; CHECK-VIS3-LABEL: neg_half_float:
363+
; CHECK-VIS3: ! %bb.0:
364+
; CHECK-VIS3-NEXT: sethi 3129344, %o0
365+
; CHECK-VIS3-NEXT: retl
366+
; CHECK-VIS3-NEXT: movwtos %o0, %f0
367+
ret float -0.5
368+
}

0 commit comments

Comments
 (0)