[llvm] 7ad12a7 - [ARM] Add tan intrinsic lowering (#95439)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 07:35:54 PDT 2024
Author: Farzon Lotfi
Date: 2024-06-14T10:35:50-04:00
New Revision: 7ad12a7c047a421400803eebae4cacc82b27be1d
URL: https://github.com/llvm/llvm-project/commit/7ad12a7c047a421400803eebae4cacc82b27be1d
DIFF: https://github.com/llvm/llvm-project/commit/7ad12a7c047a421400803eebae4cacc82b27be1d.diff
LOG: [ARM] Add tan intrinsic lowering (#95439)
- `ARMISelLowering.cpp` - Add f16 type and neon and mve vector support
for tan
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
llvm/test/CodeGen/ARM/fp16-fullfp16.ll
llvm/test/CodeGen/ARM/fp16-promote.ll
llvm/test/CodeGen/ARM/vfloatintrinsics.ll
llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
llvm/test/CodeGen/Thumb2/mve-fmath.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5a617968307dd..ef3dc87777999 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -365,6 +365,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FTAN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
@@ -875,6 +876,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
@@ -897,6 +899,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
@@ -914,6 +917,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
+ setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
@@ -1540,6 +1544,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FTAN, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
diff --git a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index e14e598086249..b6ebeaae5eb6d 100644
--- a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -56,6 +56,37 @@ L.entry:
declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+define void @test_tan(ptr %X) nounwind {
+
+; CHECK-LABEL: test_tan:
+
+; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK: movt [[reg0]], :upper16:{{.*}}
+; CHECK: vld1.64
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: vst1.64
+
+L.entry:
+ %0 = load <4 x float>, ptr @A, align 16
+ %1 = call <4 x float> @llvm.tan.v4f32(<4 x float> %0)
+ store <4 x float> %1, ptr %X, align 16
+ ret void
+}
+
+declare <4 x float> @llvm.tan.v4f32(<4 x float>) nounwind readonly
+
define void @test_exp(ptr %X) nounwind {
; CHECK-LABEL: test_exp:
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 7381d517505e8..2656cdbb0347e 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -281,6 +281,23 @@ define void @test_cos(ptr %p) {
ret void
}
+define void @test_tan(ptr %p) {
+; CHECK-LABEL: test_tan:
+; CHECK: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vldr.16 s0, [r0]
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: vstr.16 s0, [r4]
+; CHECK-NEXT: pop {r4, pc}
+ %a = load half, ptr %p, align 2
+ %r = call half @llvm.tan.f16(half %a)
+ store half %r, ptr %p
+ ret void
+}
+
define void @test_pow(ptr %p, ptr %q) {
; CHECK-LABEL: test_pow:
; CHECK: .save {r4, lr}
@@ -588,6 +605,7 @@ declare half @llvm.sqrt.f16(half %a)
declare half @llvm.powi.f16.i32(half %a, i32 %b)
declare half @llvm.sin.f16(half %a)
declare half @llvm.cos.f16(half %a)
+declare half @llvm.tan.f16(half %a)
declare half @llvm.pow.f16(half %a, half %b)
declare half @llvm.exp.f16(half %a)
declare half @llvm.exp2.f16(half %a)
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 9c01129ff30d8..ae3b8f9920e3b 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -393,6 +393,7 @@ declare half @llvm.sqrt.f16(half %a) #0
declare half @llvm.powi.f16.i32(half %a, i32 %b) #0
declare half @llvm.sin.f16(half %a) #0
declare half @llvm.cos.f16(half %a) #0
+declare half @llvm.tan.f16(half %a) #0
declare half @llvm.pow.f16(half %a, half %b) #0
declare half @llvm.exp.f16(half %a) #0
declare half @llvm.exp2.f16(half %a) #0
@@ -472,6 +473,21 @@ define void @test_cos(ptr %p) #0 {
ret void
}
+; CHECK-FP16-LABEL: test_tan:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl tanf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_tan:
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl tanf
+; CHECK-LIBCALL: bl __aeabi_f2h
+define void @test_tan(ptr %p) #0 {
+ %a = load half, ptr %p, align 2
+ %r = call half @llvm.tan.f16(half %a)
+ store half %r, ptr %p
+ ret void
+}
+
; CHECK-FP16-LABEL: test_pow:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
diff --git a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll
index 028bb76c3d435..74782d44c7423 100644
--- a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll
@@ -29,6 +29,12 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) {
%1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
ret %v2f32 %1
}
+; CHECK-LABEL: test_v2f32.tan:{{.*}}
+define %v2f32 @test_v2f32.tan(%v2f32 %a) {
+ ; CHECK: tan
+ %1 = call %v2f32 @llvm.tan.v2f32(%v2f32 %a)
+ ret %v2f32 %1
+}
; CHECK-LABEL: test_v2f32.pow:{{.*}}
define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
; CHECK: pow
@@ -112,6 +118,7 @@ declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
+declare %v2f32 @llvm.tan.v2f32(%v2f32) #0
declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
@@ -153,6 +160,12 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) {
%1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
ret %v4f32 %1
}
+; CHECK-LABEL: test_v4f32.tan:{{.*}}
+define %v4f32 @test_v4f32.tan(%v4f32 %a) {
+ ; CHECK: tan
+ %1 = call %v4f32 @llvm.tan.v4f32(%v4f32 %a)
+ ret %v4f32 %1
+}
; CHECK-LABEL: test_v4f32.pow:{{.*}}
define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
; CHECK: pow
@@ -236,6 +249,7 @@ declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
+declare %v4f32 @llvm.tan.v4f32(%v4f32) #0
declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
@@ -277,6 +291,12 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) {
%1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
ret %v2f64 %1
}
+; CHECK-LABEL: test_v2f64.tan:{{.*}}
+define %v2f64 @test_v2f64.tan(%v2f64 %a) {
+ ; CHECK: tan
+ %1 = call %v2f64 @llvm.tan.v2f64(%v2f64 %a)
+ ret %v2f64 %1
+}
; CHECK-LABEL: test_v2f64.pow:{{.*}}
define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
; CHECK: pow
@@ -361,6 +381,7 @@ declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
+declare %v2f64 @llvm.tan.v2f64(%v2f64) #0
declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
index 70a5939865b7b..7f5da36886939 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -41,6 +41,15 @@ define double @cos_d(double %a) {
ret double %1
}
+declare double @llvm.tan.f64(double %Val)
+define double @tan_d(double %a) {
+; CHECK-LABEL: tan_d:
+; SOFT: {{(bl|b)}} tan
+; HARD: b tan
+ %1 = call double @llvm.tan.f64(double %a)
+ ret double %1
+}
+
declare double @llvm.pow.f64(double %Val, double %power)
define double @pow_d(double %a, double %b) {
; CHECK-LABEL: pow_d:
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
index b6b891edd0461..94ba9b218a072 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -42,6 +42,15 @@ define float @cos_f(float %a) {
ret float %1
}
+declare float @llvm.tan.f32(float %Val)
+define float @tan_f(float %a) {
+; CHECK-LABEL: tan_f:
+; SOFT: bl tanf
+; HARD: b tanf
+ %1 = call float @llvm.tan.f32(float %a)
+ ret float %1
+}
+
declare float @llvm.pow.f32(float %Val, float %power)
define float @pow_f(float %a, float %b) {
; CHECK-LABEL: pow_f:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
index c299b62a4c942..d747da76a45fa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
@@ -288,6 +288,117 @@ entry:
ret <2 x double> %0
}
+define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) {
+; CHECK-LABEL: tan_float32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r4, d9
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov r4, r1, d8
+; CHECK-NEXT: vmov s19, r0
+; CHECK-NEXT: vmov s18, r5
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s17, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) {
+; CHECK-LABEL: tan_float16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s16
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vcvtt.f32.f16 s0, s16
+; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s20, s16
+; CHECK-NEXT: vcvtt.f16.f32 s20, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s17
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s21, s0
+; CHECK-NEXT: vcvtt.f32.f16 s0, s17
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtt.f16.f32 s21, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s18
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s22, s0
+; CHECK-NEXT: vcvtt.f32.f16 s0, s18
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtt.f16.f32 s22, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s19
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s23, s0
+; CHECK-NEXT: vcvtt.f32.f16 s0, s19
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtt.f16.f32 s23, s0
+; CHECK-NEXT: vmov q0, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) {
+; CHECK-LABEL: tan_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl tan
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl tan
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp_float32_t:
; CHECK: @ %bb.0: @ %entry
More information about the llvm-commits
mailing list