diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e3270471981cc..6faa02ec17aac 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -365,6 +365,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FTAN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); @@ -875,6 +876,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); + setOperationAction(ISD::FTAN, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); @@ -897,6 +899,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FTAN, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); @@ -914,6 +917,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); + setOperationAction(ISD::FTAN, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); @@ -1540,6 +1544,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); setOperationAction(ISD::FSIN, MVT::f16, Promote); setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FTAN, MVT::f16, Promote); setOperationAction(ISD::FSINCOS, MVT::f16, Promote); setOperationAction(ISD::FPOWI, MVT::f16, Promote); setOperationAction(ISD::FPOW, MVT::f16, Promote); diff --git a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index e14e598086249..b6ebeaae5eb6d 100644 --- a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -56,6 +56,37 @@ L.entry: declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly +define void @test_tan(ptr %X) nounwind { + +; CHECK-LABEL: test_tan: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK: movt [[reg0]], :upper16:{{.*}} +; CHECK: vld1.64 + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: vst1.64 + +L.entry: + %0 = load <4 x float>, ptr @A, align 16 + %1 = call <4 x float> @llvm.tan.v4f32(<4 x float> %0) + store <4 x float> %1, ptr %X, align 16 + ret void +} + +declare <4 x float> @llvm.tan.v4f32(<4 x float>) nounwind readonly + define void @test_exp(ptr %X) nounwind { ; CHECK-LABEL: test_exp: diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll index 7381d517505e8..2656cdbb0347e 100644 --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -281,6 +281,23 @@ define void @test_cos(ptr %p) { ret void } +define void @test_tan(ptr %p) { +; CHECK-LABEL: test_tan: +; CHECK: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vldr.16 s0, [r0] +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r4] +; CHECK-NEXT: pop {r4, pc} + %a = load half, ptr %p, align 2 + %r = call half @llvm.tan.f16(half %a) + store half %r, ptr %p + ret void +} + define void @test_pow(ptr %p, ptr %q) { ; CHECK-LABEL: test_pow: ; CHECK: .save {r4, lr} @@ -588,6 +605,7 @@ declare half @llvm.sqrt.f16(half %a) declare half @llvm.powi.f16.i32(half %a, i32 %b) declare half @llvm.sin.f16(half %a) declare half @llvm.cos.f16(half %a) +declare half @llvm.tan.f16(half %a) declare half @llvm.pow.f16(half %a, half %b) declare half @llvm.exp.f16(half %a) declare half @llvm.exp2.f16(half %a) diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 9c01129ff30d8..ae3b8f9920e3b 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -393,6 +393,7 @@ declare half @llvm.sqrt.f16(half %a) #0 declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 declare half @llvm.sin.f16(half %a) #0 declare half @llvm.cos.f16(half %a) #0 +declare half @llvm.tan.f16(half %a) #0 declare half @llvm.pow.f16(half %a, half %b) #0 declare half @llvm.exp.f16(half %a) #0 declare half @llvm.exp2.f16(half %a) #0 @@ -472,6 +473,21 @@ define void @test_cos(ptr %p) #0 { ret void } +; CHECK-FP16-LABEL: test_tan: +; CHECK-FP16: vcvtb.f32.f16 +; CHECK-FP16: bl tanf +; CHECK-FP16: vcvtb.f16.f32 +; CHECK-LIBCALL-LABEL: test_tan: +; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL: bl tanf +; CHECK-LIBCALL: bl __aeabi_f2h +define void @test_tan(ptr %p) #0 { + %a = load half, ptr %p, align 2 + %r = call half @llvm.tan.f16(half %a) + store half %r, ptr %p + ret void +} + ; CHECK-FP16-LABEL: test_pow: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 diff --git a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll index 028bb76c3d435..74782d44c7423 100644 --- a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll +++ b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll @@ -29,6 +29,12 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) { %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a) ret %v2f32 %1 } +; CHECK-LABEL: test_v2f32.tan:{{.*}} +define %v2f32 @test_v2f32.tan(%v2f32 %a) { + ; CHECK: tan + %1 = call %v2f32 @llvm.tan.v2f32(%v2f32 %a) + ret %v2f32 %1 +} ; CHECK-LABEL: test_v2f32.pow:{{.*}} define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) { ; CHECK: pow @@ -112,6 +118,7 @@ declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0 declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0 +declare %v2f32 @llvm.tan.v2f32(%v2f32) #0 declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0 declare %v2f32 @llvm.exp.v2f32(%v2f32) #0 declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0 @@ -153,6 +160,12 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) { %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a) ret %v4f32 %1 } +; CHECK-LABEL: test_v4f32.tan:{{.*}} +define %v4f32 @test_v4f32.tan(%v4f32 %a) { + ; CHECK: tan + %1 = call %v4f32 @llvm.tan.v4f32(%v4f32 %a) + ret %v4f32 %1 +} ; CHECK-LABEL: test_v4f32.pow:{{.*}} define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) { ; CHECK: pow @@ -236,6 +249,7 @@ declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0 declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0 declare %v4f32 @llvm.sin.v4f32(%v4f32) #0 declare %v4f32 @llvm.cos.v4f32(%v4f32) #0 +declare %v4f32 @llvm.tan.v4f32(%v4f32) #0 declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0 declare %v4f32 @llvm.exp.v4f32(%v4f32) #0 declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0 @@ -277,6 +291,12 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) { %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a) ret %v2f64 %1 } +; CHECK-LABEL: test_v2f64.tan:{{.*}} +define %v2f64 @test_v2f64.tan(%v2f64 %a) { + ; CHECK: tan + %1 = call %v2f64 @llvm.tan.v2f64(%v2f64 %a) + ret %v2f64 %1 +} ; CHECK-LABEL: test_v2f64.pow:{{.*}} define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) { ; CHECK: pow @@ -361,6 +381,7 @@ declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0 declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0 declare %v2f64 @llvm.sin.v2f64(%v2f64) #0 declare %v2f64 @llvm.cos.v2f64(%v2f64) #0 +declare %v2f64 @llvm.tan.v2f64(%v2f64) #0 declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0 declare %v2f64 @llvm.exp.v2f64(%v2f64) #0 declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0 diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll index 70a5939865b7b..7f5da36886939 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll @@ -41,6 +41,15 @@ define double @cos_d(double %a) { ret double %1 } +declare double @llvm.tan.f64(double %Val) +define double @tan_d(double %a) { +; CHECK-LABEL: tan_d: +; SOFT: {{(bl|b)}} tan +; HARD: b tan + %1 = call double @llvm.tan.f64(double %a) + ret double %1 +} + declare double @llvm.pow.f64(double %Val, double %power) define double @pow_d(double %a, double %b) { ; CHECK-LABEL: pow_d: diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll index b6b891edd0461..94ba9b218a072 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -42,6 +42,15 @@ define float @cos_f(float %a) { ret float %1 } +declare float @llvm.tan.f32(float %Val) +define float @tan_f(float %a) { +; CHECK-LABEL: tan_f: +; SOFT: bl tanf +; HARD: b tanf + %1 = call float @llvm.tan.f32(float %a) + ret float %1 +} + declare float @llvm.pow.f32(float %Val, float %power) define float @pow_f(float %a, float %b) { ; CHECK-LABEL: pow_f: diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll index c299b62a4c942..d747da76a45fa 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -288,6 +288,117 @@ entry: ret <2 x double> %0 } +define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) { +; CHECK-LABEL: tan_float32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r4, d9 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov r4, r1, d8 +; CHECK-NEXT: vmov s19, r0 +; CHECK-NEXT: vmov s18, r5 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s17, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s16, r0 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) { +; CHECK-LABEL: tan_float16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vcvtt.f32.f16 s0, s16 +; CHECK-NEXT: vmov s16, r0 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s17 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s17 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s18 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s18 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s19 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s19 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) { +; CHECK-LABEL: tan_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl tan +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl tan +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) { ; CHECK-LABEL: exp_float32_t: ; CHECK: @ %bb.0: @ %entry