[llvm] [Analysis] isTriviallyVectorizable - add vectorization support for acos/asin/atan and cosh/sinh/tanh intrinsics (PR #106584)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 09:53:01 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/106584
Show fallback cases in amdlibm tests where it doesn't have that specific op
>From 33ffb6533c54528af359b618571e89b50563e6b4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 29 Aug 2024 17:38:08 +0100
Subject: [PATCH 1/2] [LoopVectorize][X86] amdlibm-calls.ll - add tests showing
failure to vectorize acos/asin/atan and cosh/sinh/tanh intrinsics
---
.../Transforms/LoopVectorize/X86/amdlibm-calls.ll | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index 04289d43f40e2f..dd5bf100284e83 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -414,6 +414,7 @@ for.end:
define void @acos_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @acos_f32_intrinsic(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -487,7 +488,10 @@ for.end:
define void @asin_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f64_intrinsic(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -510,6 +514,7 @@ for.end:
define void @asin_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f32_intrinsic(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -588,6 +593,7 @@ define void @atan_f64_intrinsic(ptr nocapture %varray) {
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -610,6 +616,7 @@ for.end:
define void @atan_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @atan_f32_intrinsic(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -683,6 +690,9 @@ for.end:
define void @cosh_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f64_intrinsic(
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -705,8 +715,10 @@ for.end:
define void @cosh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f32_intrinsic(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -754,6 +766,7 @@ for.end:
define void @tanh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @tanh_f32_intrinsic(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
>From 8d660e1da337914e6564212c3adda5246a642aa7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 29 Aug 2024 17:44:37 +0100
Subject: [PATCH 2/2] [Analysis] isTriviallyVectorizable - add vectorization
support for acos/asin/atan and cosh/sinh/tanh intrinsics
---
llvm/lib/Analysis/VectorUtils.cpp | 6 +++++
.../LoopVectorize/X86/amdlibm-calls.ll | 26 +++++++++----------
2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index cc742ab35f4498..32ce34114b2f50 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -66,9 +66,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::umul_fix:
case Intrinsic::umul_fix_sat:
case Intrinsic::sqrt: // Begin floating-point.
+ case Intrinsic::asin:
+ case Intrinsic::acos:
+ case Intrinsic::atan:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::tan:
+ case Intrinsic::sinh:
+ case Intrinsic::cosh:
+ case Intrinsic::tanh:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index dd5bf100284e83..c051e2f18380bd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -414,7 +414,7 @@ for.end:
define void @acos_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @acos_f32_intrinsic(
-; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -488,10 +488,10 @@ for.end:
define void @asin_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f64_intrinsic(
-; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
-; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
-; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -514,7 +514,7 @@ for.end:
define void @asin_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f32_intrinsic(
-; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -593,7 +593,7 @@ define void @atan_f64_intrinsic(ptr nocapture %varray) {
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
-; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -616,7 +616,7 @@ for.end:
define void @atan_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @atan_f32_intrinsic(
-; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -690,9 +690,9 @@ for.end:
define void @cosh_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f64_intrinsic(
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
-; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
-; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
-; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -715,10 +715,10 @@ for.end:
define void @cosh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f32_intrinsic(
-; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
-; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -766,7 +766,7 @@ for.end:
define void @tanh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @tanh_f32_intrinsic(
-; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
More information about the llvm-commits
mailing list