[llvm] ceb613a - [RISCV] Add full test coverage for acos/asin/atan and cosh/sinh/tanh intrinsics to support #106584
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 06:01:25 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-30T14:01:15+01:00
New Revision: ceb613a8bed218e2c98cd4fad3fd2a4a3217bd77
URL: https://github.com/llvm/llvm-project/commit/ceb613a8bed218e2c98cd4fad3fd2a4a3217bd77
DIFF: https://github.com/llvm/llvm-project/commit/ceb613a8bed218e2c98cd4fad3fd2a4a3217bd77.diff
LOG: [RISCV] Add full test coverage for acos/asin/atan and cosh/sinh/tanh intrinsics to support #106584
Added:
Modified:
llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
index 059e4c38b519bd..6fbd05aaedfe5b 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
@@ -597,6 +597,690 @@ entry:
ret <4 x float> %vecins.3
}
+declare float @cosf(float) readonly nounwind willreturn
+
+; We can not vectorized cos cosce RISCV has no such instruction.
+define <4 x float> @cos_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @cos_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @cos_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @cosf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @cosf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @cosf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @cosf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.cos.f32(float)
+
+; We can not vectorized cos cosce RISCV has no such instruction.
+define <4 x float> @int_cos_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_cos_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_cos_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.cos.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.cos.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.cos.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.cos.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @acosf(float) readonly nounwind willreturn
+
+; We can not vectorized acos cosce RISCV has no such instruction.
+define <4 x float> @acos_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @acos_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @acos_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @acosf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @acosf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @acosf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @acosf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.acos.f32(float)
+
+; We can not vectorized acos cosce RISCV has no such instruction.
+define <4 x float> @int_acos_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_acos_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_acos_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.acos.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.acos.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.acos.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.acos.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @tanf(float) readonly nounwind willreturn
+
+; We can not vectorized tan tance RISCV has no such instruction.
+define <4 x float> @tan_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @tan_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @tan_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @tanf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @tanf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @tanf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @tanf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.tan.f32(float)
+
+; We can not vectorized tan tance RISCV has no such instruction.
+define <4 x float> @int_tan_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_tan_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_tan_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.tan.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.tan.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.tan.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.tan.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @atanf(float) readonly nounwind willreturn
+
+; We can not vectorized atan tance RISCV has no such instruction.
+define <4 x float> @atan_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @atan_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @atan_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @atanf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @atanf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @atanf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @atanf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.atan.f32(float)
+
+; We can not vectorized atan tance RISCV has no such instruction.
+define <4 x float> @int_atan_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_atan_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_atan_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.atan.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.atan.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.atan.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.atan.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @sinhf(float) readonly nounwind willreturn
+
+; We can not vectorized sinh since RISCV has no such instruction.
+define <4 x float> @sinh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @sinh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @sinh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @sinhf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @sinhf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @sinhf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @sinhf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.sinh.f32(float)
+
+; We can not vectorized sinh since RISCV has no such instruction.
+define <4 x float> @int_sinh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_sinh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_sinh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.sinh.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @asinhf(float) readonly nounwind willreturn
+
+; We can not vectorized asinh since RISCV has no such instruction.
+define <4 x float> @asinh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @asinh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @asinh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @asinhf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @asinhf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @asinhf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @asinhf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.asinh.f32(float)
+
+; We can not vectorized asinh since RISCV has no such instruction.
+define <4 x float> @int_asinh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_asinh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_asinh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.asinh.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.asinh.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.asinh.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.asinh.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
declare float @coshf(float) readonly nounwind willreturn
; We can not vectorized cosh since RISCV has no such instruction.
@@ -711,6 +1395,234 @@ entry:
ret <4 x float> %vecins.3
}
+declare float @acoshf(float) readonly nounwind willreturn
+
+; We can not vectorized acosh since RISCV has no such instruction.
+define <4 x float> @acosh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @acosh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @acosh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @acoshf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @acoshf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @acoshf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @acoshf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.acosh.f32(float)
+
+; We can not vectorized acosh since RISCV has no such instruction.
+define <4 x float> @int_acosh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_acosh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_acosh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.acosh.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.acosh.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.acosh.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.acosh.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @tanhf(float) readonly nounwind willreturn
+
+; We can not vectorized tanh since RISCV has no such instruction.
+define <4 x float> @tanh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @tanh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @tanh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @tanhf(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @tanhf(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @tanhf(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @tanhf(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
+declare float @llvm.tanh.f32(float)
+
+; We can not vectorized tanh since RISCV has no such instruction.
+define <4 x float> @int_tanh_4x(ptr %a) {
+; CHECK-LABEL: define <4 x float> @int_tanh_4x
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
+; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
+; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+;
+; DEFAULT-LABEL: define <4 x float> @int_tanh_4x
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
+; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+;
+entry:
+ %0 = load <4 x float>, ptr %a, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %1 = tail call fast float @llvm.tanh.f32(float %vecext)
+ %vecins = insertelement <4 x float> undef, float %1, i32 0
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
+ %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
+ %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
+ %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+ ret <4 x float> %vecins.3
+}
+
declare float @atanhf(float) readonly nounwind willreturn
; We can not vectorized atanh since RISCV has no such instruction.
More information about the llvm-commits
mailing list