[llvm] [CodeGen] Support vectors across all backends (PR #95518)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 02:20:16 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-webassembly
Author: Farzon Lotfi (farzonl)
<details>
<summary>Changes</summary>
Add a default f16 type promotion
---
Full diff: https://github.com/llvm/llvm-project/pull/95518.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/TargetLoweringBase.cpp (+2-1)
- (modified) llvm/test/CodeGen/RISCV/half-intrinsics.ll (+120)
- (modified) llvm/test/CodeGen/WebAssembly/simd-unsupported.ll (+16)
- (modified) llvm/test/Transforms/LoopVectorize/intrinsic.ll (+54)
``````````diff
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 8240a1fd7e2ff..de534994fa48c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT},
+ ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN},
VT, Expand);
// Constrained floating-point operations default to expand.
@@ -1020,6 +1020,7 @@ void TargetLoweringBase::initActions() {
ISD::FTAN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
+ setOperationAction(ISD::FTAN, MVT::f16, Promote);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index c493a9b2cb1df..bfc26b0d65980 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -2862,3 +2862,123 @@ define i1 @isnan_d_fpclass(half %x) {
%1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan
ret i1 %1
}
+
+declare half @llvm.tan.f16(half)
+
+define half @tan_f16(half %a) nounwind {
+; RV32IZFH-LABEL: tan_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call tanf
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: tan_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call tanf
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV32IZHINX-LABEL: tan_f16:
+; RV32IZHINX: # %bb.0:
+; RV32IZHINX-NEXT: addi sp, sp, -16
+; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINX-NEXT: fcvt.s.h a0, a0
+; RV32IZHINX-NEXT: call tanf
+; RV32IZHINX-NEXT: fcvt.h.s a0, a0
+; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINX-NEXT: addi sp, sp, 16
+; RV32IZHINX-NEXT: ret
+;
+; RV64IZHINX-LABEL: tan_f16:
+; RV64IZHINX: # %bb.0:
+; RV64IZHINX-NEXT: addi sp, sp, -16
+; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINX-NEXT: fcvt.s.h a0, a0
+; RV64IZHINX-NEXT: call tanf
+; RV64IZHINX-NEXT: fcvt.h.s a0, a0
+; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINX-NEXT: addi sp, sp, 16
+; RV64IZHINX-NEXT: ret
+;
+; RV32I-LABEL: tan_f16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: call __extendhfsf2
+; RV32I-NEXT: call tanf
+; RV32I-NEXT: call __truncsfhf2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: tan_f16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: call __extendhfsf2
+; RV64I-NEXT: call tanf
+; RV64I-NEXT: call __truncsfhf2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV32IZFHMIN-LABEL: tan_f16:
+; RV32IZFHMIN: # %bb.0:
+; RV32IZFHMIN-NEXT: addi sp, sp, -16
+; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFHMIN-NEXT: call tanf
+; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: ret
+;
+; RV64IZFHMIN-LABEL: tan_f16:
+; RV64IZFHMIN: # %bb.0:
+; RV64IZFHMIN-NEXT: addi sp, sp, -16
+; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFHMIN-NEXT: call tanf
+; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFHMIN-NEXT: addi sp, sp, 16
+; RV64IZFHMIN-NEXT: ret
+;
+; RV32IZHINXMIN-LABEL: tan_f16:
+; RV32IZHINXMIN: # %bb.0:
+; RV32IZHINXMIN-NEXT: addi sp, sp, -16
+; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV32IZHINXMIN-NEXT: call tanf
+; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
+; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINXMIN-NEXT: addi sp, sp, 16
+; RV32IZHINXMIN-NEXT: ret
+;
+; RV64IZHINXMIN-LABEL: tan_f16:
+; RV64IZHINXMIN: # %bb.0:
+; RV64IZHINXMIN-NEXT: addi sp, sp, -16
+; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV64IZHINXMIN-NEXT: call tanf
+; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
+; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINXMIN-NEXT: addi sp, sp, 16
+; RV64IZHINXMIN-NEXT: ret
+ %1 = call half @llvm.tan.f16(half %a)
+ ret half %1
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
index d214a3af5a151..1d6e073271efa 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
@@ -377,6 +377,14 @@ define <4 x float> @cos_v4f32(<4 x float> %x) {
ret <4 x float> %v
}
+; CHECK-LABEL: tan_v4f32:
+; CHECK: call $push[[L:[0-9]+]]=, tanf
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+define <4 x float> @tan_v4f32(<4 x float> %x) {
+ %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
+ ret <4 x float> %v
+}
+
; CHECK-LABEL: powi_v4f32:
; CHECK: call $push[[L:[0-9]+]]=, __powisf2
declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32)
@@ -469,6 +477,14 @@ define <2 x double> @cos_v2f64(<2 x double> %x) {
ret <2 x double> %v
}
+; CHECK-LABEL: tan_v2f64:
+; CHECK: call $push[[L:[0-9]+]]=, tan
+declare <2 x double> @llvm.tan.v2f64(<2 x double>)
+define <2 x double> @tan_v2f64(<2 x double> %x) {
+ %v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x)
+ ret <2 x double> %v
+}
+
; CHECK-LABEL: powi_v2f64:
; CHECK: call $push[[L:[0-9]+]]=, __powidf2
declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index 0f070347dd4ef..9c910d70807a1 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -162,6 +162,60 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.cos.f64(double)
+define void @tan_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f32(
+; CHECK: llvm.tan.v4f32
+; CHECK: ret void
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.tan.f32(float %0)
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+ store float %call, ptr %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @llvm.tan.f32(float)
+
+define void @tan_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f64(
+; CHECK: llvm.tan.v4f64
+; CHECK: ret void
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call double @llvm.tan.f64(double %0)
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+ store double %call, ptr %arrayidx2, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare double @llvm.tan.f64(double)
+
define void @exp_f32(i32 %n, ptr %y, ptr %x) {
; CHECK-LABEL: @exp_f32(
; CHECK: llvm.exp.v4f32
``````````
</details>
https://github.com/llvm/llvm-project/pull/95518
More information about the llvm-commits
mailing list