[llvm] c4b5cb0 - [AArch64] Add accelerate test coverage for acos/asin/atan and cosh/sinh/tanh intrinsics to support #106584

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 30 02:58:49 PDT 2024


Author: Simon Pilgrim
Date: 2024-08-30T10:58:31+01:00
New Revision: c4b5cb0f31227074d423b2db378dfbc486a5550e

URL: https://github.com/llvm/llvm-project/commit/c4b5cb0f31227074d423b2db378dfbc486a5550e
DIFF: https://github.com/llvm/llvm-project/commit/c4b5cb0f31227074d423b2db378dfbc486a5550e.diff

LOG: [AArch64] Add accelerate test coverage for acos/asin/atan and cosh/sinh/tanh intrinsics to support #106584

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
index eae38295ba08cf..809059034c7f98 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
@@ -611,6 +611,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_asin_4x(ptr %a) {
+; CHECK-LABEL: @int_asin_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_asin_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.asin.f32(float %vecext)
+  %vecins = insertelement <4 x float> poison, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.asin.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.asin.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.asin.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @acosf(float) readonly nounwind willreturn
 define <4 x float> @acos_4x(ptr %a) {
 ; CHECK-LABEL: @acos_4x(
@@ -652,6 +692,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_acos_4x(ptr %a) {
+; CHECK-LABEL: @int_acos_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_acos_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.acos.f32(float %vecext)
+  %vecins = insertelement <4 x float> poison, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.acos.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.acos.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.acos.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @atanf(float) readonly nounwind willreturn
 define <4 x float> @atan_4x(ptr %a) {
 ; CHECK-LABEL: @atan_4x(
@@ -693,6 +773,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_atan_4x(ptr %a) {
+; CHECK-LABEL: @int_atan_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_atan_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.atan.f32(float %vecext)
+  %vecins = insertelement <4 x float> poison, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.atan.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.atan.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.atan.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @sinhf(float) readonly nounwind willreturn
 define <4 x float> @sinh_4x(ptr %a) {
 ; CHECK-LABEL: @sinh_4x(
@@ -734,6 +854,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_sinh_4x(ptr %a) {
+; CHECK-LABEL: @int_sinh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_sinh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.sinh.f32(float %vecext)
+  %vecins = insertelement <4 x float> poison, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @coshf(float) readonly nounwind willreturn
 define <4 x float> @cosh_4x(ptr %a) {
 ; CHECK-LABEL: @cosh_4x(
@@ -775,6 +935,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_cosh_4x(ptr %a) {
+; CHECK-LABEL: @int_cosh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_cosh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.cosh.f32(float %vecext)
+  %vecins = insertelement <4 x float> poison, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @tanhf(float) readonly nounwind willreturn
 define <4 x float> @tanh_4x(ptr %a) {
 ; CHECK-LABEL: @tanh_4x(
@@ -816,6 +1016,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_tanh_4x(ptr %a) {
+; CHECK-LABEL: @int_tanh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_tanh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.tanh.f32(float %vecext)
+  %vecins = insertelement <4 x float> poison, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @asinhf(float) readonly nounwind willreturn
 define <4 x float> @asinh_4x(ptr %a) {
 ; CHECK-LABEL: @asinh_4x(

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
index 5e2dd305f05576..36633a1053b14f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
@@ -611,6 +611,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_asin_4x(ptr %a) {
+; CHECK-LABEL: @int_asin_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_asin_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.asin.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.asin.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.asin.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.asin.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @acosf(float) readonly nounwind willreturn
 define <4 x float> @acos_4x(ptr %a) {
 ; CHECK-LABEL: @acos_4x(
@@ -652,6 +692,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_acos_4x(ptr %a) {
+; CHECK-LABEL: @int_acos_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_acos_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.acos.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.acos.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.acos.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.acos.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @atanf(float) readonly nounwind willreturn
 define <4 x float> @atan_4x(ptr %a) {
 ; CHECK-LABEL: @atan_4x(
@@ -693,6 +773,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_atan_4x(ptr %a) {
+; CHECK-LABEL: @int_atan_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_atan_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.atan.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.atan.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.atan.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.atan.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @sinhf(float) readonly nounwind willreturn
 define <4 x float> @sinh_4x(ptr %a) {
 ; CHECK-LABEL: @sinh_4x(
@@ -734,6 +854,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_sinh_4x(ptr %a) {
+; CHECK-LABEL: @int_sinh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_sinh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.sinh.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @coshf(float) readonly nounwind willreturn
 define <4 x float> @cosh_4x(ptr %a) {
 ; CHECK-LABEL: @cosh_4x(
@@ -775,6 +935,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_cosh_4x(ptr %a) {
+; CHECK-LABEL: @int_cosh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_cosh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.cosh.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @tanhf(float) readonly nounwind willreturn
 define <4 x float> @tanh_4x(ptr %a) {
 ; CHECK-LABEL: @tanh_4x(
@@ -816,6 +1016,46 @@ entry:
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
+define <4 x float> @int_tanh_4x(ptr %a) {
+; CHECK-LABEL: @int_tanh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+; NOACCELERATE-LABEL: @int_tanh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, ptr %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.tanh.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
 declare float @asinhf(float) readonly nounwind willreturn
 define <4 x float> @asinh_4x(ptr %a) {
 ; CHECK-LABEL: @asinh_4x(


        


More information about the llvm-commits mailing list