Skip to content

[SPARC] Use op-then-halve instructions when we have VIS3 #135718

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3562,8 +3562,14 @@ bool SparcTargetLowering::isFNegFree(EVT VT) const {

bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
return Subtarget->isVIS() && (VT == MVT::f32 || VT == MVT::f64) &&
Imm.isZero();
if (VT != MVT::f32 && VT != MVT::f64)
return false;
if (Subtarget->isVIS() && Imm.isZero())
return true;
if (Subtarget->isVIS3())
return Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5) ||
Imm.getExactLog2Abs() == -1;
return false;
}

bool SparcTargetLowering::isCtlzFast() const { return Subtarget->isVIS3(); }
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/Sparc/SparcInstrVIS.td
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>;
// FP immediate patterns.
def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;

// VIS instruction patterns.
let Predicates = [HasVIS] in {
Expand All @@ -293,6 +295,16 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;

// VIS3 instruction patterns.
let Predicates = [HasVIS3] in {
// +/-0.5 immediate.
// This is needed to enable halving instructions.
// FIXME generalize this to arbitrary immediates.
// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
// faster than constant pool loading.
def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;

def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;

def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
Expand Down Expand Up @@ -329,4 +341,12 @@ def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;

// Op-then-halve FP operations.
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
} // Predicates = [HasVIS3]
173 changes: 173 additions & 0 deletions llvm/test/CodeGen/SPARC/float-constants.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=sparc | FileCheck %s
; RUN: llc < %s -mtriple=sparcel | FileCheck %s --check-prefix=CHECK-LE
; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis | FileCheck %s --check-prefix=CHECK-VIS
; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis,+vis3 | FileCheck %s --check-prefix=CHECK-VIS3

;; Bitcast should not do a runtime conversion, but rather emit a
;; constant into integer registers directly.
Expand All @@ -24,6 +25,12 @@ define <2 x i32> @bitcast() nounwind {
; CHECK-VIS-NEXT: sethi 1049856, %o0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: mov %g0, %o1
;
; CHECK-VIS3-LABEL: bitcast:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: sethi 1049856, %o0
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: mov %g0, %o1
%1 = bitcast double 5.0 to <2 x i32>
ret <2 x i32> %1
}
Expand Down Expand Up @@ -61,6 +68,17 @@ define void @test_call() nounwind {
; CHECK-VIS-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0
; CHECK-VIS-NEXT: ret
; CHECK-VIS-NEXT: restore
;
; CHECK-VIS3-LABEL: test_call:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: save %sp, -176, %sp
; CHECK-VIS3-NEXT: sethi %h44(.LCPI1_0), %i0
; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI1_0), %i0
; CHECK-VIS3-NEXT: sllx %i0, 12, %i0
; CHECK-VIS3-NEXT: call a
; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0
; CHECK-VIS3-NEXT: ret
; CHECK-VIS3-NEXT: restore
call void @a(double 5.0)
ret void
}
Expand Down Expand Up @@ -106,6 +124,19 @@ define double @test_intrins_call() nounwind {
; CHECK-VIS-NEXT: nop
; CHECK-VIS-NEXT: ret
; CHECK-VIS-NEXT: restore
;
; CHECK-VIS3-LABEL: test_intrins_call:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: save %sp, -176, %sp
; CHECK-VIS3-NEXT: sethi %h44(.LCPI2_0), %i0
; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI2_0), %i0
; CHECK-VIS3-NEXT: sllx %i0, 12, %i0
; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI2_0)], %f0
; CHECK-VIS3-NEXT: fmovd %f0, %f2
; CHECK-VIS3-NEXT: call pow
; CHECK-VIS3-NEXT: nop
; CHECK-VIS3-NEXT: ret
; CHECK-VIS3-NEXT: restore
%1 = call double @llvm.pow.f64(double 2.0, double 2.0)
ret double %1
}
Expand All @@ -129,6 +160,11 @@ define double @pos_zero_double() nounwind {
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fzero %f0
;
; CHECK-VIS3-LABEL: pos_zero_double:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: fzero %f0
ret double +0.0
}

Expand All @@ -150,6 +186,12 @@ define double @neg_zero_double() nounwind {
; CHECK-VIS-NEXT: fzero %f0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fnegd %f0, %f0
;
; CHECK-VIS3-LABEL: neg_zero_double:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: fzero %f0
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: fnegd %f0, %f0
ret double -0.0
}

Expand All @@ -170,6 +212,11 @@ define float @pos_zero_float() nounwind {
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fzeros %f0
;
; CHECK-VIS3-LABEL: pos_zero_float:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: fzeros %f0
ret float +0.0
}

Expand All @@ -191,5 +238,131 @@ define float @neg_zero_float() nounwind {
; CHECK-VIS-NEXT: fzeros %f0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fnegs %f0, %f0
;
; CHECK-VIS3-LABEL: neg_zero_float:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: fzeros %f0
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: fnegs %f0, %f0
ret float -0.0
}

;; When we have VIS3, f32/f64 +/-0.5 constant should be materialized from sethi.

define double @pos_half_double() nounwind {
; CHECK-LABEL: pos_half_double:
; CHECK: ! %bb.0:
; CHECK-NEXT: sethi %hi(.LCPI7_0), %o0
; CHECK-NEXT: retl
; CHECK-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0
;
; CHECK-LE-LABEL: pos_half_double:
; CHECK-LE: ! %bb.0:
; CHECK-LE-NEXT: sethi %hi(.LCPI7_0), %o0
; CHECK-LE-NEXT: retl
; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0
;
; CHECK-VIS-LABEL: pos_half_double:
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: sethi %h44(.LCPI7_0), %o0
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI7_0), %o0
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI7_0)], %f0
;
; CHECK-VIS3-LABEL: pos_half_double:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: sethi 1046528, %o0
; CHECK-VIS3-NEXT: sllx %o0, 32, %o0
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: movxtod %o0, %f0
ret double +0.5
}

define double @neg_half_double() nounwind {
; CHECK-LABEL: neg_half_double:
; CHECK: ! %bb.0:
; CHECK-NEXT: sethi %hi(.LCPI8_0), %o0
; CHECK-NEXT: retl
; CHECK-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0
;
; CHECK-LE-LABEL: neg_half_double:
; CHECK-LE: ! %bb.0:
; CHECK-LE-NEXT: sethi %hi(.LCPI8_0), %o0
; CHECK-LE-NEXT: retl
; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0
;
; CHECK-VIS-LABEL: neg_half_double:
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: sethi %h44(.LCPI8_0), %o0
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI8_0), %o0
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI8_0)], %f0
;
; CHECK-VIS3-LABEL: neg_half_double:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: sethi 3143680, %o0
; CHECK-VIS3-NEXT: sllx %o0, 32, %o0
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: movxtod %o0, %f0
ret double -0.5
}

define float @pos_half_float() nounwind {
; CHECK-LABEL: pos_half_float:
; CHECK: ! %bb.0:
; CHECK-NEXT: sethi %hi(.LCPI9_0), %o0
; CHECK-NEXT: retl
; CHECK-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0
;
; CHECK-LE-LABEL: pos_half_float:
; CHECK-LE: ! %bb.0:
; CHECK-LE-NEXT: sethi %hi(.LCPI9_0), %o0
; CHECK-LE-NEXT: retl
; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0
;
; CHECK-VIS-LABEL: pos_half_float:
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: sethi %h44(.LCPI9_0), %o0
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI9_0), %o0
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI9_0)], %f0
;
; CHECK-VIS3-LABEL: pos_half_float:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: sethi 1032192, %o0
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: movwtos %o0, %f0
ret float +0.5
}

define float @neg_half_float() nounwind {
; CHECK-LABEL: neg_half_float:
; CHECK: ! %bb.0:
; CHECK-NEXT: sethi %hi(.LCPI10_0), %o0
; CHECK-NEXT: retl
; CHECK-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0
;
; CHECK-LE-LABEL: neg_half_float:
; CHECK-LE: ! %bb.0:
; CHECK-LE-NEXT: sethi %hi(.LCPI10_0), %o0
; CHECK-LE-NEXT: retl
; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0
;
; CHECK-VIS-LABEL: neg_half_float:
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: sethi %h44(.LCPI10_0), %o0
; CHECK-VIS-NEXT: add %o0, %m44(.LCPI10_0), %o0
; CHECK-VIS-NEXT: sllx %o0, 12, %o0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI10_0)], %f0
;
; CHECK-VIS3-LABEL: neg_half_float:
; CHECK-VIS3: ! %bb.0:
; CHECK-VIS3-NEXT: sethi 3129344, %o0
; CHECK-VIS3-NEXT: retl
; CHECK-VIS3-NEXT: movwtos %o0, %f0
ret float -0.5
}
Loading
Loading