[llvm] [CodeGen] Support SLPVectorizer cases of tan across all backends (PR #95517)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 01:52:45 PDT 2024
https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/95517
Add a default f16 type promotion
>From 4e45379ce5d29b4a001171a6c707b9be2b5b3067 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 14 Jun 2024 02:57:16 -0400
Subject: [PATCH] [CodeGen] Support SLPVectorizer cases of tan across all
backends Add a default f16 type promotion
---
.../include/llvm/Analysis/TargetLibraryInfo.h | 3 +
.../llvm/Analysis/TargetTransformInfoImpl.h | 11 +-
llvm/lib/Analysis/ValueTracking.cpp | 4 +
llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +-
llvm/test/CodeGen/RISCV/half-intrinsics.ll | 120 ++++++++++++++++++
.../CodeGen/WebAssembly/simd-unsupported.ll | 16 +++
.../Transforms/LoopVectorize/intrinsic.ll | 54 ++++++++
...ccelerate-vector-functions-inseltpoison.ll | 12 +-
.../AArch64/accelerate-vector-functions.ll | 12 +-
.../test/Transforms/SLPVectorizer/X86/call.ll | 19 +++
10 files changed, 235 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index f5da222d11f55..ce0cc38baf77e 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -415,10 +415,12 @@ class TargetLibraryInfo {
return false;
switch (F) {
default: break;
+ // clang-format off
case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
+ case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl:
case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
case LibFunc_sqrtl_finite:
@@ -437,6 +439,7 @@ class TargetLibraryInfo {
case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy:
+ // clang-format on
return true;
}
return false;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7828bdc1f1f43..b1d426830f0da 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -156,14 +156,17 @@ class TargetTransformInfoImplBase {
StringRef Name = F->getName();
// These will all likely lower to a single selection DAG node.
+ // clang-format off
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
- Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
+ Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
Name == "fmin" || Name == "fminf" || Name == "fminl" ||
Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
- Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
- Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
+ Name == "sin" || Name == "sinf" || Name == "sinl" ||
+ Name == "cos" || Name == "cosf" || Name == "cosl" ||
+ Name == "tan" || Name == "tanf" || Name == "tanl" ||
+ Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
return false;
-
+ // clang-format on
// These are all likely to be optimized into something smaller.
if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8126d2a1acc27..0a477082a7d12 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3993,6 +3993,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
case LibFunc_cosf:
case LibFunc_cosl:
return Intrinsic::cos;
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ return Intrinsic::tan;
case LibFunc_exp:
case LibFunc_expf:
case LibFunc_expl:
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 8240a1fd7e2ff..de534994fa48c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT},
+ ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN},
VT, Expand);
// Constrained floating-point operations default to expand.
@@ -1020,6 +1020,7 @@ void TargetLoweringBase::initActions() {
ISD::FTAN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
+ setOperationAction(ISD::FTAN, MVT::f16, Promote);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index c493a9b2cb1df..bfc26b0d65980 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -2862,3 +2862,123 @@ define i1 @isnan_d_fpclass(half %x) {
%1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan
ret i1 %1
}
+
+declare half @llvm.tan.f16(half)
+
+define half @tan_f16(half %a) nounwind {
+; RV32IZFH-LABEL: tan_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call tanf
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: tan_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call tanf
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV32IZHINX-LABEL: tan_f16:
+; RV32IZHINX: # %bb.0:
+; RV32IZHINX-NEXT: addi sp, sp, -16
+; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINX-NEXT: fcvt.s.h a0, a0
+; RV32IZHINX-NEXT: call tanf
+; RV32IZHINX-NEXT: fcvt.h.s a0, a0
+; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINX-NEXT: addi sp, sp, 16
+; RV32IZHINX-NEXT: ret
+;
+; RV64IZHINX-LABEL: tan_f16:
+; RV64IZHINX: # %bb.0:
+; RV64IZHINX-NEXT: addi sp, sp, -16
+; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINX-NEXT: fcvt.s.h a0, a0
+; RV64IZHINX-NEXT: call tanf
+; RV64IZHINX-NEXT: fcvt.h.s a0, a0
+; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINX-NEXT: addi sp, sp, 16
+; RV64IZHINX-NEXT: ret
+;
+; RV32I-LABEL: tan_f16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: call __extendhfsf2
+; RV32I-NEXT: call tanf
+; RV32I-NEXT: call __truncsfhf2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: tan_f16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: call __extendhfsf2
+; RV64I-NEXT: call tanf
+; RV64I-NEXT: call __truncsfhf2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV32IZFHMIN-LABEL: tan_f16:
+; RV32IZFHMIN: # %bb.0:
+; RV32IZFHMIN-NEXT: addi sp, sp, -16
+; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFHMIN-NEXT: call tanf
+; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: ret
+;
+; RV64IZFHMIN-LABEL: tan_f16:
+; RV64IZFHMIN: # %bb.0:
+; RV64IZFHMIN-NEXT: addi sp, sp, -16
+; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFHMIN-NEXT: call tanf
+; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFHMIN-NEXT: addi sp, sp, 16
+; RV64IZFHMIN-NEXT: ret
+;
+; RV32IZHINXMIN-LABEL: tan_f16:
+; RV32IZHINXMIN: # %bb.0:
+; RV32IZHINXMIN-NEXT: addi sp, sp, -16
+; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV32IZHINXMIN-NEXT: call tanf
+; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
+; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINXMIN-NEXT: addi sp, sp, 16
+; RV32IZHINXMIN-NEXT: ret
+;
+; RV64IZHINXMIN-LABEL: tan_f16:
+; RV64IZHINXMIN: # %bb.0:
+; RV64IZHINXMIN-NEXT: addi sp, sp, -16
+; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV64IZHINXMIN-NEXT: call tanf
+; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
+; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINXMIN-NEXT: addi sp, sp, 16
+; RV64IZHINXMIN-NEXT: ret
+ %1 = call half @llvm.tan.f16(half %a)
+ ret half %1
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
index d214a3af5a151..1d6e073271efa 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
@@ -377,6 +377,14 @@ define <4 x float> @cos_v4f32(<4 x float> %x) {
ret <4 x float> %v
}
+; CHECK-LABEL: tan_v4f32:
+; CHECK: call $push[[L:[0-9]+]]=, tanf
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+define <4 x float> @tan_v4f32(<4 x float> %x) {
+ %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
+ ret <4 x float> %v
+}
+
; CHECK-LABEL: powi_v4f32:
; CHECK: call $push[[L:[0-9]+]]=, __powisf2
declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32)
@@ -469,6 +477,14 @@ define <2 x double> @cos_v2f64(<2 x double> %x) {
ret <2 x double> %v
}
+; CHECK-LABEL: tan_v2f64:
+; CHECK: call $push[[L:[0-9]+]]=, tan
+declare <2 x double> @llvm.tan.v2f64(<2 x double>)
+define <2 x double> @tan_v2f64(<2 x double> %x) {
+ %v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x)
+ ret <2 x double> %v
+}
+
; CHECK-LABEL: powi_v2f64:
; CHECK: call $push[[L:[0-9]+]]=, __powidf2
declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index 0f070347dd4ef..9c910d70807a1 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -162,6 +162,60 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.cos.f64(double)
+define void @tan_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f32(
+; CHECK: llvm.tan.v4f32
+; CHECK: ret void
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.tan.f32(float %0)
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+ store float %call, ptr %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @llvm.tan.f32(float)
+
+define void @tan_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f64(
+; CHECK: llvm.tan.v4f64
+; CHECK: ret void
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call double @llvm.tan.f64(double %0)
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+ store double %call, ptr %arrayidx2, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare double @llvm.tan.f64(double)
+
define void @exp_f32(i32 %n, ptr %y, ptr %x) {
; CHECK-LABEL: @exp_f32(
; CHECK: llvm.exp.v4f32
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
index 6db27e597a63f..eae38295ba08c 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
-; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
-; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
+; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
+; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
index 24e16deacb3af..5e2dd305f0557 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
-; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
-; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
+; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
+; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 4181148a4d829..8835e3b144be6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -6,6 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
declare double @sin(double) nounwind willreturn
declare double @cos(double) nounwind willreturn
+declare double @tan(double) nounwind willreturn
declare double @pow(double, double) nounwind willreturn
declare double @exp2(double) nounwind willreturn
declare double @sqrt(double) nounwind willreturn
@@ -48,6 +49,24 @@ define void @cos_libm(ptr %a, ptr %b) {
ret void
}
+define void @tan_libm(ptr %a, ptr %b) {
+; CHECK-LABEL: @tan_libm(
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]])
+; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
+; CHECK-NEXT: ret void
+;
+ %a0 = load double, ptr %a, align 8
+ %idx1 = getelementptr inbounds double, ptr %a, i64 1
+ %a1 = load double, ptr %idx1, align 8
+ %tan1 = tail call double @tan(double %a0) nounwind readnone
+ %tan2 = tail call double @tan(double %a1) nounwind readnone
+ store double %tan1, ptr %b, align 8
+ %idx2 = getelementptr inbounds double, ptr %b, i64 1
+ store double %tan2, ptr %idx2, align 8
+ ret void
+}
+
define void @pow_libm(ptr %a, ptr %b) {
; CHECK-LABEL: @pow_libm(
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
More information about the llvm-commits
mailing list