[llvm] [TLI] Pass replace-with-veclib works with Scalable Vectors. (PR #73642)
Paschalis Mpeis via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 19 04:33:16 PST 2023
https://github.com/paschalis-mpeis updated https://github.com/llvm/llvm-project/pull/73642
>From d412f7467a9b4dfc8ab8d969b45704d058fe6e6f Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 24 Nov 2023 13:44:45 +0000
Subject: [PATCH 1/6] [NFC][TLI] Improve tests for ArmPL and SLEEF Intrinsics.
Auto-generate test `armpl-intrinsics.ll`, and use active lane mask to
have shorter `shufflevector` check lines.
Update scripts now add `@llvm.compiler.used` instead of using the regex:
`@[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]]`
---
.../AArch64/sleef-intrinsic-calls-aarch64.ll | 190 +++++++++++-------
1 file changed, 114 insertions(+), 76 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
index 2300ce74996e39..83898374c1c6c5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
@@ -139,8 +139,9 @@ define void @llvm_cos_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_cos_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]]
; SVE: ret void
;
entry:
@@ -168,8 +169,9 @@ define void @llvm_cos_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_cos_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]]
; SVE: ret void
;
entry:
@@ -200,8 +202,9 @@ define void @llvm_exp_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[CONV:%.*]]) #[[ATTR6:[0-9]+]]
; SVE: ret void
;
entry:
@@ -229,8 +232,9 @@ define void @llvm_exp_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
; SVE: ret void
;
entry:
@@ -261,8 +265,9 @@ define void @llvm_exp2_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp2_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
; SVE: ret void
;
entry:
@@ -290,8 +295,9 @@ define void @llvm_exp2_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp2_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
; SVE: ret void
;
entry:
@@ -322,8 +328,9 @@ define void @llvm_exp10_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp10_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
; SVE: ret void
;
entry:
@@ -351,8 +358,9 @@ define void @llvm_exp10_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp10_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
; SVE: ret void
;
entry:
@@ -383,8 +391,9 @@ define void @llvm_fabs_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fabs_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.fabs.f64(double [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -413,8 +422,9 @@ define void @llvm_fabs_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fabs_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.fabs.f32(float [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -445,8 +455,9 @@ define void @llvm_floor_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_floor_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.floor.f64(double [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -474,8 +485,9 @@ define void @llvm_floor_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_floor_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.floor.f32(float [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -506,8 +518,9 @@ define void @llvm_fma_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fma_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]], <vscale x 2 x double> [[TMP17]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x double> [[TMP11]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.fma.f64(double [[CONV:%.*]], double [[CONV]], double [[CONV]])
; SVE: ret void
;
entry:
@@ -535,8 +548,9 @@ define void @llvm_fma_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fma_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]], <vscale x 4 x float> [[TMP17]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x float> [[TMP11]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.fma.f32(float [[CONV:%.*]], float [[CONV]], float [[CONV]])
; SVE: ret void
;
entry:
@@ -567,8 +581,9 @@ define void @llvm_log_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
; SVE: ret void
;
entry:
@@ -596,8 +611,9 @@ define void @llvm_log_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]]
; SVE: ret void
;
entry:
@@ -628,8 +644,9 @@ define void @llvm_log10_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log10_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]]
; SVE: ret void
;
entry:
@@ -657,8 +674,9 @@ define void @llvm_log10_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log10_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]]
; SVE: ret void
;
entry:
@@ -689,8 +707,9 @@ define void @llvm_log2_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log2_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[CONV:%.*]]) #[[ATTR16:[0-9]+]]
; SVE: ret void
;
entry:
@@ -718,8 +737,9 @@ define void @llvm_log2_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log2_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[CONV:%.*]]) #[[ATTR17:[0-9]+]]
; SVE: ret void
;
entry:
@@ -750,8 +770,9 @@ define void @llvm_maxnum_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_maxnum_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.maxnum.f64(double [[CONV:%.*]], double [[CONV]])
; SVE: ret void
;
entry:
@@ -779,8 +800,9 @@ define void @llvm_maxnum_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_maxnum_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.maxnum.f32(float [[CONV:%.*]], float [[CONV]])
; SVE: ret void
;
entry:
@@ -811,8 +833,9 @@ define void @llvm_minnum_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_minnum_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.minnum.f64(double [[CONV:%.*]], double [[CONV]])
; SVE: ret void
;
entry:
@@ -840,8 +863,9 @@ define void @llvm_minnum_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_minnum_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.minnum.f32(float [[CONV:%.*]], float [[CONV]])
; SVE: ret void
;
entry:
@@ -872,8 +896,9 @@ define void @llvm_nearbyint_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_nearbyint_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.nearbyint.f64(double [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -901,8 +926,9 @@ define void @llvm_nearbyint_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_nearbyint_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.nearbyint.f32(float [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -933,8 +959,9 @@ define void @llvm_pow_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_pow_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[CONV:%.*]], double [[CONV]]) #[[ATTR18:[0-9]+]]
; SVE: ret void
;
entry:
@@ -962,8 +989,9 @@ define void @llvm_pow_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_pow_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[CONV:%.*]], float [[CONV]]) #[[ATTR19:[0-9]+]]
; SVE: ret void
;
entry:
@@ -994,8 +1022,9 @@ define void @llvm_rint_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_rint_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.rint.f64(double [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -1023,8 +1052,9 @@ define void @llvm_rint_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_rint_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.rint.f32(float [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -1055,8 +1085,9 @@ define void @llvm_round_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_round_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.round.f64(double [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -1084,8 +1115,9 @@ define void @llvm_round_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_round_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.round.f32(float [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -1116,8 +1148,9 @@ define void @llvm_sin_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sin_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV:%.*]]) #[[ATTR20:[0-9]+]]
; SVE: ret void
;
entry:
@@ -1145,8 +1178,9 @@ define void @llvm_sin_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sin_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[CONV:%.*]]) #[[ATTR21:[0-9]+]]
; SVE: ret void
;
entry:
@@ -1177,8 +1211,9 @@ define void @llvm_sqrt_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sqrt_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -1206,8 +1241,9 @@ define void @llvm_sqrt_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sqrt_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -1238,8 +1274,9 @@ define void @llvm_trunc_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_trunc_f64
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.trunc.f64(double [[CONV:%.*]])
; SVE: ret void
;
entry:
@@ -1267,8 +1304,9 @@ define void @llvm_trunc_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_trunc_f32
-; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.trunc.f32(float [[CONV:%.*]])
; SVE: ret void
;
entry:
>From fe84777007eb8bb0359429e47f715390d2d44bd6 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Mon, 27 Nov 2023 17:36:29 +0000
Subject: [PATCH 2/6] Add `simplifycfg` pass and `noalias` to ensure tail
folding.
`noalias` attribute was added only to the `%in.ptr` parameter of the
ArmPL Intrinsics.
---
.../AArch64/sleef-intrinsic-calls-aarch64.ll | 190 +++++++-----------
1 file changed, 76 insertions(+), 114 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
index 83898374c1c6c5..2300ce74996e39 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
@@ -139,9 +139,8 @@ define void @llvm_cos_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_cos_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -169,9 +168,8 @@ define void @llvm_cos_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_cos_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -202,9 +200,8 @@ define void @llvm_exp_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[CONV:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -232,9 +229,8 @@ define void @llvm_exp_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -265,9 +261,8 @@ define void @llvm_exp2_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp2_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -295,9 +290,8 @@ define void @llvm_exp2_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp2_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -328,9 +322,8 @@ define void @llvm_exp10_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp10_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -358,9 +351,8 @@ define void @llvm_exp10_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_exp10_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -391,9 +383,8 @@ define void @llvm_fabs_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fabs_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.fabs.f64(double [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -422,9 +413,8 @@ define void @llvm_fabs_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fabs_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.fabs.f32(float [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -455,9 +445,8 @@ define void @llvm_floor_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_floor_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.floor.f64(double [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -485,9 +474,8 @@ define void @llvm_floor_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_floor_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.floor.f32(float [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -518,9 +506,8 @@ define void @llvm_fma_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fma_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x double> [[TMP11]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.fma.f64(double [[CONV:%.*]], double [[CONV]], double [[CONV]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]], <vscale x 2 x double> [[TMP17]])
; SVE: ret void
;
entry:
@@ -548,9 +535,8 @@ define void @llvm_fma_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_fma_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x float> [[TMP11]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.fma.f32(float [[CONV:%.*]], float [[CONV]], float [[CONV]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]], <vscale x 4 x float> [[TMP17]])
; SVE: ret void
;
entry:
@@ -581,9 +567,8 @@ define void @llvm_log_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -611,9 +596,8 @@ define void @llvm_log_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -644,9 +628,8 @@ define void @llvm_log10_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log10_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -674,9 +657,8 @@ define void @llvm_log10_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log10_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -707,9 +689,8 @@ define void @llvm_log2_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log2_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[CONV:%.*]]) #[[ATTR16:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -737,9 +718,8 @@ define void @llvm_log2_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log2_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[CONV:%.*]]) #[[ATTR17:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -770,9 +750,8 @@ define void @llvm_maxnum_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_maxnum_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.maxnum.f64(double [[CONV:%.*]], double [[CONV]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]])
; SVE: ret void
;
entry:
@@ -800,9 +779,8 @@ define void @llvm_maxnum_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_maxnum_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.maxnum.f32(float [[CONV:%.*]], float [[CONV]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]])
; SVE: ret void
;
entry:
@@ -833,9 +811,8 @@ define void @llvm_minnum_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_minnum_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.minnum.f64(double [[CONV:%.*]], double [[CONV]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]])
; SVE: ret void
;
entry:
@@ -863,9 +840,8 @@ define void @llvm_minnum_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_minnum_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.minnum.f32(float [[CONV:%.*]], float [[CONV]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]])
; SVE: ret void
;
entry:
@@ -896,9 +872,8 @@ define void @llvm_nearbyint_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_nearbyint_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.nearbyint.f64(double [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -926,9 +901,8 @@ define void @llvm_nearbyint_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_nearbyint_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.nearbyint.f32(float [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -959,9 +933,8 @@ define void @llvm_pow_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_pow_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[CONV:%.*]], double [[CONV]]) #[[ATTR18:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x double> [[TMP17]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -989,9 +962,8 @@ define void @llvm_pow_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_pow_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[CONV:%.*]], float [[CONV]]) #[[ATTR19:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x float> [[TMP17]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -1022,9 +994,8 @@ define void @llvm_rint_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_rint_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.rint.f64(double [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -1052,9 +1023,8 @@ define void @llvm_rint_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_rint_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.rint.f32(float [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -1085,9 +1055,8 @@ define void @llvm_round_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_round_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.round.f64(double [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -1115,9 +1084,8 @@ define void @llvm_round_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_round_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.round.f32(float [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -1148,9 +1116,8 @@ define void @llvm_sin_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sin_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV:%.*]]) #[[ATTR20:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[TMP17:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -1178,9 +1145,8 @@ define void @llvm_sin_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sin_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[CONV:%.*]]) #[[ATTR21:[0-9]+]]
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[TMP17:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
; SVE: ret void
;
entry:
@@ -1211,9 +1177,8 @@ define void @llvm_sqrt_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sqrt_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -1241,9 +1206,8 @@ define void @llvm_sqrt_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_sqrt_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -1274,9 +1238,8 @@ define void @llvm_trunc_f64(double* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_trunc_f64
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call double @llvm.trunc.f64(double [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[TMP17:%.*]])
; SVE: ret void
;
entry:
@@ -1304,9 +1267,8 @@ define void @llvm_trunc_f32(float* %varray) {
; NEON: ret void
;
; SVE-LABEL: define void @llvm_trunc_f32
-; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
-; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
-; SVE: [[CALL:%.*]] = tail call float @llvm.trunc.f32(float [[CONV:%.*]])
+; SVE-SAME: (ptr [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[TMP17:%.*]])
; SVE: ret void
;
entry:
>From 52ce4f2d655a60cc2eb5dd5b0b01fc925a38c273 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Mon, 27 Nov 2023 16:55:17 +0000
Subject: [PATCH 3/6] [TLI] Pass replace-with-veclib works with Scalable
Vectors.
The pass uses the Masked variant of TLI method when the Intrinsic
operates on Scalable Vectors and it fails to find a non-Masked variant.
---
llvm/lib/Analysis/VFABIDemangling.cpp | 2 +-
llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 24 ++++++-------
.../replace-intrinsics-with-veclib-armpl.ll | 36 +++++++++----------
...e-intrinsics-with-veclib-sleef-scalable.ll | 35 +++++++++---------
4 files changed, 50 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp
index 22fc52070015c5..426f98c0c6284a 100644
--- a/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -126,7 +126,7 @@ static ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
return ParseRet::None;
}
-/// The function looks for the following stringt at the beginning of
+/// The function looks for the following string at the beginning of
/// the input string `ParseString`:
///
/// <token> <number>
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 36c91b7fa97e46..d31a793556dfde 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -105,6 +105,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// all vector operands have identical vector width.
ElementCount VF = ElementCount::getFixed(0);
SmallVector<Type *> ScalarTypes;
+ bool MayBeMasked = false;
for (auto Arg : enumerate(CI.args())) {
auto *ArgType = Arg.value()->getType();
// Vector calls to intrinsics can still have
@@ -121,17 +122,13 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
return false;
}
ElementCount NumElements = VectorArgTy->getElementCount();
- if (NumElements.isScalable()) {
- // The current implementation does not support
- // scalable vectors.
- return false;
- }
- if (VF.isNonZero() && VF != NumElements) {
- // The different arguments differ in vector size.
+ if (NumElements.isScalable())
+ MayBeMasked = true;
+
+ // The different arguments differ in vector size.
+ if (VF.isNonZero() && VF != NumElements)
return false;
- } else {
- VF = NumElements;
- }
+ VF = NumElements;
ScalarTypes.push_back(VectorArgTy->getElementType());
}
}
@@ -152,11 +149,14 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
return false;
}
+ // Assume it has a mask when that is a possibility and has no mapping for
+ // a Non-Masked variant.
+ const bool IsMasked =
+ MayBeMasked && !TLI.getVectorMappingInfo(ScalarName, VF, false);
// Try to find the mapping for the scalar version of this intrinsic
// and the exact vector width of the call operands in the
// TargetLibraryInfo.
- StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF);
-
+ StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF, IsMasked);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
<< ScalarName << "` and vector width " << VF << ".\n");
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
index 18431ae021f976..633cb220f52464 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
;.
-; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x], section "llvm.metadata"
;.
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
; CHECK-LABEL: define <2 x double> @llvm_cos_f64
@@ -40,7 +40,7 @@ define <4 x float> @llvm_cos_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_cos_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
@@ -50,7 +50,7 @@ define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_cos_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
@@ -85,7 +85,7 @@ define <4 x float> @llvm_sin_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
@@ -95,7 +95,7 @@ define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
@@ -130,7 +130,7 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
@@ -140,7 +140,7 @@ define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
@@ -175,7 +175,7 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
@@ -185,7 +185,7 @@ define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
@@ -220,7 +220,7 @@ define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
@@ -230,7 +230,7 @@ define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #
define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
@@ -265,7 +265,7 @@ define <4 x float> @llvm_log_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
@@ -275,7 +275,7 @@ define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
@@ -310,7 +310,7 @@ define <4 x float> @llvm_log2_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
@@ -320,7 +320,7 @@ define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
@@ -355,7 +355,7 @@ define <4 x float> @llvm_log10_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
@@ -365,7 +365,7 @@ define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #
define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
@@ -380,7 +380,7 @@ declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4
;
; There is a bug in the replace-with-veclib pass, and for intrinsics which take
; more than one arguments, but has just one overloaded type, it incorrectly
-; reconstructs the scalar name, for pow specificlly it is searching for:
+; reconstructs the scalar name, for pow specifically it is searching for:
; llvm.pow.f64.f64 and llvm.pow.f32.f32
;
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
index 8b06c41bcb1a6d..969945590a0a18 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
@@ -5,6 +5,9 @@ target triple = "aarch64-unknown-linux-gnu"
; NOTE: The existing TLI mappings are not used since the -replace-with-veclib pass is broken for scalable vectors.
+;.
+; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf], section "llvm.metadata"
+;.
define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_vscale_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
@@ -43,7 +46,7 @@ define <vscale x 4 x float> @llvm_copysign_vscale_f32(<vscale x 4 x float> %mag,
define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_cos_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
@@ -52,7 +55,7 @@ define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_cos_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
@@ -61,7 +64,7 @@ define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_exp_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
@@ -70,7 +73,7 @@ define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_exp_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
@@ -79,7 +82,7 @@ define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_exp2_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
@@ -88,7 +91,7 @@ define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_exp2_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
@@ -97,7 +100,7 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_exp10_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
@@ -106,7 +109,7 @@ define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_exp10_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
@@ -169,7 +172,7 @@ define <vscale x 4 x float> @llvm_fma_vscale_f32(<vscale x 4 x float> %a, <vscal
define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_log_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
@@ -178,7 +181,7 @@ define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_log_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
@@ -187,7 +190,7 @@ define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_log10_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
@@ -196,7 +199,7 @@ define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_log10_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
@@ -205,7 +208,7 @@ define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_log2_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
@@ -214,7 +217,7 @@ define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_log2_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
@@ -331,7 +334,7 @@ define <vscale x 4 x float> @llvm_round_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_sin_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
@@ -340,7 +343,7 @@ define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_sin_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[IN:%.*]])
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
>From 690d36dc8a97d8ac82f51d6726eba439bb0c7b3f Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Tue, 12 Dec 2023 15:48:35 +0000
Subject: [PATCH 4/6] Use createFunctionType to correctly replace veclib calls.
Split replaceWithTLIFunction method into two methods.
---
llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 203 ++++++++++--------
.../replace-intrinsics-with-veclib-armpl.ll | 32 +--
...e-intrinsics-with-veclib-sleef-scalable.ll | 32 +--
3 files changed, 149 insertions(+), 118 deletions(-)
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index d31a793556dfde..ddcc55a8e52c40 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -15,15 +15,19 @@
#include "llvm/CodeGen/ReplaceWithVeclib.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <optional>
using namespace llvm;
@@ -38,138 +42,166 @@ STATISTIC(NumTLIFuncDeclAdded,
STATISTIC(NumFuncUsedAdded,
"Number of functions added to `llvm.compiler.used`");
-static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
- Module *M = CI.getModule();
-
- Function *OldFunc = CI.getCalledFunction();
-
- // Check if the vector library function is already declared in this module,
- // otherwise insert it.
+/// Returns a vector Function that it adds to the Module \p M. When an \p
+/// OptOldFunc is given, it copies its attributes to the newly created Function.
+Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
+ std::optional<Function *> OptOldFunc,
+ const StringRef TLIName) {
Function *TLIFunc = M->getFunction(TLIName);
if (!TLIFunc) {
- TLIFunc = Function::Create(OldFunc->getFunctionType(),
- Function::ExternalLinkage, TLIName, *M);
- TLIFunc->copyAttributesFrom(OldFunc);
+ TLIFunc =
+ Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
+ if (OptOldFunc)
+ TLIFunc->copyAttributesFrom(*OptOldFunc);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
<< TLIName << "` of type `" << *(TLIFunc->getType())
<< "` to module.\n");
++NumTLIFuncDeclAdded;
-
- // Add the freshly created function to llvm.compiler.used,
- // similar to as it is done in InjectTLIMappings
+ // Add the freshly created function to llvm.compiler.used, similar to as it
+ // is done in InjectTLIMappings
appendToCompilerUsed(*M, {TLIFunc});
-
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
<< "` to `@llvm.compiler.used`.\n");
++NumFuncUsedAdded;
}
+ return TLIFunc;
+}
- // Replace the call to the vector intrinsic with a call
- // to the corresponding function from the vector library.
+/// Replace the call to the vector intrinsic ( \p OldFunc ) with a call to the
+/// corresponding function from the vector library ( \p TLIFunc ).
+static bool replaceWithTLIFunction(const Module *M, CallInst &CI,
+ const ElementCount &VecVF, Function *OldFunc,
+ Function *TLIFunc, FunctionType *VecFTy,
+ bool IsMasked) {
IRBuilder<> IRBuilder(&CI);
SmallVector<Value *> Args(CI.args());
+ if (IsMasked) {
+ if (Args.size() == VecFTy->getNumParams())
+ static_assert(true && "mask was already in place");
+
+ auto *MaskTy = VectorType::get(Type::getInt1Ty(M->getContext()), VecVF);
+ Args.push_back(Constant::getAllOnesValue(MaskTy));
+ }
+
// Preserve the operand bundles.
SmallVector<OperandBundleDef, 1> OpBundles;
CI.getOperandBundlesAsDefs(OpBundles);
CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
- assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
+ assert(VecFTy == TLIFunc->getFunctionType() &&
"Expecting function types to be identical");
CI.replaceAllUsesWith(Replacement);
- if (isa<FPMathOperator>(Replacement)) {
- // Preserve fast math flags for FP math.
+ // Preserve fast math flags for FP math.
+ if (isa<FPMathOperator>(Replacement))
Replacement->copyFastMathFlags(&CI);
- }
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
- << OldFunc->getName() << "` with call to `" << TLIName
- << "`.\n");
+ << OldFunc->getName() << "` with call to `"
+ << TLIFunc->getName() << "`.\n");
++NumCallsReplaced;
return true;
}
+/// Utility method to get the VecDesc, depending on whether there is a TLI
+/// mapping, either with or without a mask.
+static std::optional<const VecDesc *> getVecDesc(const TargetLibraryInfo &TLI,
+ const StringRef &ScalarName,
+ const ElementCount &VF) {
+ const VecDesc *VDMasked = TLI.getVectorMappingInfo(ScalarName, VF, true);
+ const VecDesc *VDNoMask = TLI.getVectorMappingInfo(ScalarName, VF, false);
+ // Invalid when there are both variants (ie masked and unmasked), or none
+ if ((VDMasked == nullptr) == (VDNoMask == nullptr))
+ return std::nullopt;
+
+ return {VDMasked != nullptr ? VDMasked : VDNoMask};
+}
+
+/// Returns whether it is able to replace a call to the intrinsic \p CI with a
+/// TLI mapped call.
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
CallInst &CI) {
- if (!CI.getCalledFunction()) {
+ if (!CI.getCalledFunction())
return false;
- }
auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
- if (IntrinsicID == Intrinsic::not_intrinsic) {
- // Replacement is only performed for intrinsic functions
+ // Replacement is only performed for intrinsic functions
+ if (IntrinsicID == Intrinsic::not_intrinsic)
return false;
- }
- // Convert vector arguments to scalar type and check that
- // all vector operands have identical vector width.
+ // Convert vector arguments to scalar type and check that all vector operands
+ // have identical vector width.
ElementCount VF = ElementCount::getFixed(0);
SmallVector<Type *> ScalarTypes;
- bool MayBeMasked = false;
for (auto Arg : enumerate(CI.args())) {
- auto *ArgType = Arg.value()->getType();
- // Vector calls to intrinsics can still have
- // scalar operands for specific arguments.
+ auto *ArgTy = Arg.value()->getType();
if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
- ScalarTypes.push_back(ArgType);
- } else {
- // The argument in this place should be a vector if
- // this is a call to a vector intrinsic.
- auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
- if (!VectorArgTy) {
- // The argument is not a vector, do not perform
- // the replacement.
- return false;
- }
- ElementCount NumElements = VectorArgTy->getElementCount();
- if (NumElements.isScalable())
- MayBeMasked = true;
-
- // The different arguments differ in vector size.
- if (VF.isNonZero() && VF != NumElements)
+ ScalarTypes.push_back(ArgTy);
+ } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
+ ScalarTypes.push_back(ArgTy->getScalarType());
+ // Disallow vector arguments with different VFs. When processing the first
+ // vector argument, store it's VF, and for the rest ensure that they match
+ // it.
+ if (VF.isZero())
+ VF = VectorArgTy->getElementCount();
+ else if (VF != VectorArgTy->getElementCount())
return false;
- VF = NumElements;
- ScalarTypes.push_back(VectorArgTy->getElementType());
+ } else {
+ // enters when it is supposed to be a vector argument but it isn't.
+ return false;
}
}
- // Try to reconstruct the name for the scalar version of this
- // intrinsic using the intrinsic ID and the argument types
- // converted to scalar above.
- std::string ScalarName;
- if (Intrinsic::isOverloaded(IntrinsicID)) {
- ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule());
- } else {
- ScalarName = Intrinsic::getName(IntrinsicID).str();
- }
+ // Try to reconstruct the name for the scalar version of this intrinsic using
+ // the intrinsic ID and the argument types converted to scalar above.
+ std::string ScalarName =
+ (Intrinsic::isOverloaded(IntrinsicID)
+ ? Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule())
+ : Intrinsic::getName(IntrinsicID).str());
- if (!TLI.isFunctionVectorizable(ScalarName)) {
- // The TargetLibraryInfo does not contain a vectorized version of
- // the scalar function.
+ // The TargetLibraryInfo does not contain a vectorized version of the scalar
+ // function.
+ if (!TLI.isFunctionVectorizable(ScalarName))
return false;
- }
- // Assume it has a mask when that is a possibility and has no mapping for
- // a Non-Masked variant.
- const bool IsMasked =
- MayBeMasked && !TLI.getVectorMappingInfo(ScalarName, VF, false);
- // Try to find the mapping for the scalar version of this intrinsic
- // and the exact vector width of the call operands in the
- // TargetLibraryInfo.
- StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF, IsMasked);
+ auto OptVD = getVecDesc(TLI, ScalarName, VF);
+ if (!OptVD)
+ return false;
+
+ const VecDesc *VD = *OptVD;
+ // Try to find the mapping for the scalar version of this intrinsic and the
+ // exact vector width of the call operands in the TargetLibraryInfo.
+ StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF, VD->isMasked());
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
<< ScalarName << "` and vector width " << VF << ".\n");
- if (!TLIName.empty()) {
- // Found the correct mapping in the TargetLibraryInfo,
- // replace the call to the intrinsic with a call to
- // the vector library function.
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
- << "`.\n");
- return replaceWithTLIFunction(CI, TLIName);
- }
+ // TLI failed to find a correct mapping.
+ if (TLIName.empty())
+ return false;
- return false;
+ // Find the vector Function and replace the call to the intrinsic with a call
+ // to the vector library function.
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
+ << "`.\n");
+
+ Type *ScalarRetTy = CI.getType()->getScalarType();
+ FunctionType *ScalarFTy = FunctionType::get(ScalarRetTy, ScalarTypes, false);
+ const std::string MangledName = VD->getVectorFunctionABIVariantString();
+ auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
+ if (!OptInfo)
+ return false;
+
+ // get the vector FunctionType
+ Module *M = CI.getModule();
+ auto OptFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy);
+ if (!OptFTy)
+ return false;
+
+ Function *OldFunc = CI.getCalledFunction();
+ FunctionType *VectorFTy = *OptFTy;
+ Function *TLIFunc = getTLIFunction(M, VectorFTy, OldFunc, TLIName);
+ return replaceWithTLIFunction(M, CI, OptInfo->Shape.VF, OldFunc, TLIFunc,
+ VectorFTy, VD->isMasked());
}
static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
@@ -185,9 +217,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
}
// Erase the calls to the intrinsics that have been replaced
// with calls to the vector library.
- for (auto *CI : ReplacedCalls) {
+ for (auto *CI : ReplacedCalls)
CI->eraseFromParent();
- }
return Changed;
}
@@ -207,10 +238,10 @@ PreservedAnalyses ReplaceWithVeclib::run(Function &F,
PA.preserve<DemandedBitsAnalysis>();
PA.preserve<OptimizationRemarkEmitterAnalysis>();
return PA;
- } else {
- // The pass did not replace any calls, hence it preserves all analyses.
- return PreservedAnalyses::all();
}
+
+ // The pass did not replace any calls, hence it preserves all analyses.
+ return PreservedAnalyses::all();
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
index 633cb220f52464..d41870ec6e7915 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
@@ -40,7 +40,7 @@ define <4 x float> @llvm_cos_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_cos_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
@@ -50,7 +50,7 @@ define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_cos_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
@@ -85,7 +85,7 @@ define <4 x float> @llvm_sin_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
@@ -95,7 +95,7 @@ define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
@@ -130,7 +130,7 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
@@ -140,7 +140,7 @@ define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
@@ -175,7 +175,7 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
@@ -185,7 +185,7 @@ define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
@@ -220,7 +220,7 @@ define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
@@ -230,7 +230,7 @@ define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #
define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
@@ -265,7 +265,7 @@ define <4 x float> @llvm_log_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
@@ -275,7 +275,7 @@ define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
@@ -310,7 +310,7 @@ define <4 x float> @llvm_log2_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
@@ -320,7 +320,7 @@ define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0
define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
@@ -355,7 +355,7 @@ define <4 x float> @llvm_log10_f32(<4 x float> %in) {
define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
@@ -365,7 +365,7 @@ define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #
define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
index 969945590a0a18..baf16f83a3e240 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
@@ -46,7 +46,7 @@ define <vscale x 4 x float> @llvm_copysign_vscale_f32(<vscale x 4 x float> %mag,
define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_cos_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
@@ -55,7 +55,7 @@ define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_cos_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
@@ -64,7 +64,7 @@ define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_exp_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
@@ -73,7 +73,7 @@ define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_exp_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
@@ -82,7 +82,7 @@ define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_exp2_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
@@ -91,7 +91,7 @@ define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_exp2_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
@@ -100,7 +100,7 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_exp10_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
@@ -109,7 +109,7 @@ define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_exp10_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
@@ -172,7 +172,7 @@ define <vscale x 4 x float> @llvm_fma_vscale_f32(<vscale x 4 x float> %a, <vscal
define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_log_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
@@ -181,7 +181,7 @@ define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_log_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
@@ -190,7 +190,7 @@ define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_log10_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
@@ -199,7 +199,7 @@ define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_log10_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
@@ -208,7 +208,7 @@ define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_log2_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
@@ -217,7 +217,7 @@ define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_log2_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
@@ -334,7 +334,7 @@ define <vscale x 4 x float> @llvm_round_vscale_f32(<vscale x 4 x float> %in) {
define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_sin_vscale_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
@@ -343,7 +343,7 @@ define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) {
define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) {
; CHECK-LABEL: @llvm_sin_vscale_f32(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
>From 02b0e21aa97ae72834438223e68edc65ae8a23f1 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Wed, 13 Dec 2023 18:08:46 +0000
Subject: [PATCH 5/6] getVecDesc now prioritizes masked variant
Also further cleanup to address reviewers.
---
llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 85 ++++++++-----------
...e-intrinsics-with-veclib-sleef-scalable.ll | 2 -
2 files changed, 36 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index ddcc55a8e52c40..4ea163e4eaafbb 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -69,20 +69,20 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
return TLIFunc;
}
-/// Replace the call to the vector intrinsic ( \p OldFunc ) with a call to the
-/// corresponding function from the vector library ( \p TLIFunc ).
-static bool replaceWithTLIFunction(const Module *M, CallInst &CI,
- const ElementCount &VecVF, Function *OldFunc,
- Function *TLIFunc, FunctionType *VecFTy,
- bool IsMasked) {
+/// Replace the call to the vector intrinsic ( \p FuncToReplace ) with a call to
+/// the corresponding function from the vector library ( \p TLIFunc ).
+static void replaceWithTLIFunction(CallInst &CI, VFInfo &Info,
+ Function *TLIFunc, FunctionType *VecFTy) {
IRBuilder<> IRBuilder(&CI);
SmallVector<Value *> Args(CI.args());
- if (IsMasked) {
+ if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
if (Args.size() == VecFTy->getNumParams())
static_assert(true && "mask was already in place");
- auto *MaskTy = VectorType::get(Type::getInt1Ty(M->getContext()), VecVF);
- Args.push_back(Constant::getAllOnesValue(MaskTy));
+ auto *MaskTy =
+ VectorType::get(Type::getInt1Ty(CI.getContext()), Info.Shape.VF);
+ Args.insert(Args.begin() + OptMaskpos.value(),
+ Constant::getAllOnesValue(MaskTy));
}
// Preserve the operand bundles.
@@ -95,26 +95,18 @@ static bool replaceWithTLIFunction(const Module *M, CallInst &CI,
// Preserve fast math flags for FP math.
if (isa<FPMathOperator>(Replacement))
Replacement->copyFastMathFlags(&CI);
-
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
- << OldFunc->getName() << "` with call to `"
- << TLIFunc->getName() << "`.\n");
- ++NumCallsReplaced;
- return true;
}
-/// Utility method to get the VecDesc, depending on whether there is a TLI
-/// mapping, either with or without a mask.
+/// Utility method to get the VecDesc, depending on whether there is such a TLI
+/// mapping, prioritizing a masked version.
static std::optional<const VecDesc *> getVecDesc(const TargetLibraryInfo &TLI,
const StringRef &ScalarName,
const ElementCount &VF) {
- const VecDesc *VDMasked = TLI.getVectorMappingInfo(ScalarName, VF, true);
- const VecDesc *VDNoMask = TLI.getVectorMappingInfo(ScalarName, VF, false);
- // Invalid when there are both variants (ie masked and unmasked), or none
- if ((VDMasked == nullptr) == (VDNoMask == nullptr))
- return std::nullopt;
-
- return {VDMasked != nullptr ? VDMasked : VDNoMask};
+ if (auto *VDMasked = TLI.getVectorMappingInfo(ScalarName, VF, true))
+ return VDMasked;
+ if (auto *VDNoMask = TLI.getVectorMappingInfo(ScalarName, VF, false))
+ return VDNoMask;
+ return std::nullopt;
}
/// Returns whether it is able to replace a call to the intrinsic \p CI with a
@@ -146,10 +138,9 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
VF = VectorArgTy->getElementCount();
else if (VF != VectorArgTy->getElementCount())
return false;
- } else {
+ } else
// enters when it is supposed to be a vector argument but it isn't.
return false;
- }
}
// Try to reconstruct the name for the scalar version of this intrinsic using
@@ -164,26 +155,19 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
if (!TLI.isFunctionVectorizable(ScalarName))
return false;
+ // Try to find the mapping for the scalar version of this intrinsic and the
+ // exact vector width of the call operands in the TargetLibraryInfo.
auto OptVD = getVecDesc(TLI, ScalarName, VF);
if (!OptVD)
return false;
const VecDesc *VD = *OptVD;
- // Try to find the mapping for the scalar version of this intrinsic and the
- // exact vector width of the call operands in the TargetLibraryInfo.
- StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF, VD->isMasked());
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
- << ScalarName << "` and vector width " << VF << ".\n");
-
- // TLI failed to find a correct mapping.
- if (TLIName.empty())
- return false;
-
- // Find the vector Function and replace the call to the intrinsic with a call
- // to the vector library function.
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
- << "`.\n");
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName
+ << "` and vector width " << VF << " to: `"
+ << VD->getVectorFnName() << "`.\n");
+ // Replace the call to the intrinsic with a call to the vector library
+ // function.
Type *ScalarRetTy = CI.getType()->getScalarType();
FunctionType *ScalarFTy = FunctionType::get(ScalarRetTy, ScalarTypes, false);
const std::string MangledName = VD->getVectorFunctionABIVariantString();
@@ -191,17 +175,20 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
if (!OptInfo)
return false;
- // get the vector FunctionType
- Module *M = CI.getModule();
- auto OptFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy);
- if (!OptFTy)
+ FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy);
+ if (!VectorFTy)
return false;
- Function *OldFunc = CI.getCalledFunction();
- FunctionType *VectorFTy = *OptFTy;
- Function *TLIFunc = getTLIFunction(M, VectorFTy, OldFunc, TLIName);
- return replaceWithTLIFunction(M, CI, OptInfo->Shape.VF, OldFunc, TLIFunc,
- VectorFTy, VD->isMasked());
+ Function *FuncToReplace = CI.getCalledFunction();
+ Function *TLIFunc = getTLIFunction(CI.getModule(), VectorFTy, FuncToReplace,
+ VD->getVectorFnName());
+ replaceWithTLIFunction(CI, *OptInfo, TLIFunc, VectorFTy);
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+ << FuncToReplace->getName() << "` with call to `"
+ << TLIFunc->getName() << "`.\n");
+ ++NumCallsReplaced;
+ return true;
}
static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
index baf16f83a3e240..c2ff6014bc6944 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
@@ -3,8 +3,6 @@
target triple = "aarch64-unknown-linux-gnu"
-; NOTE: The existing TLI mappings are not used since the -replace-with-veclib pass is broken for scalable vectors.
-
;.
; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf], section "llvm.metadata"
;.
>From 5b2e317962d0239d46fc0be2c76556b04cfcd0eb Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 15 Dec 2023 12:08:37 +0000
Subject: [PATCH 6/6] Addressing review.
getTLIFunction is no longer an optional. It accepts a pointer for
ScalarFunc
---
llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 41 +++++++++++++-------------
1 file changed, 20 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 4ea163e4eaafbb..05ceb6d01bc983 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -43,16 +43,16 @@ STATISTIC(NumFuncUsedAdded,
"Number of functions added to `llvm.compiler.used`");
/// Returns a vector Function that it adds to the Module \p M. When an \p
-/// OptOldFunc is given, it copies its attributes to the newly created Function.
+/// ScalarFunc is not null, it copies its attributes to the newly created
+/// Function.
Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
- std::optional<Function *> OptOldFunc,
- const StringRef TLIName) {
+ Function *ScalarFunc, const StringRef TLIName) {
Function *TLIFunc = M->getFunction(TLIName);
if (!TLIFunc) {
TLIFunc =
Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
- if (OptOldFunc)
- TLIFunc->copyAttributesFrom(*OptOldFunc);
+ if (ScalarFunc)
+ TLIFunc->copyAttributesFrom(ScalarFunc);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
<< TLIName << "` of type `" << *(TLIFunc->getType())
@@ -60,7 +60,7 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
++NumTLIFuncDeclAdded;
// Add the freshly created function to llvm.compiler.used, similar to as it
- // is done in InjectTLIMappings
+ // is done in InjectTLIMappings.
appendToCompilerUsed(*M, {TLIFunc});
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
<< "` to `@llvm.compiler.used`.\n");
@@ -72,11 +72,11 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
/// Replace the call to the vector intrinsic ( \p FuncToReplace ) with a call to
/// the corresponding function from the vector library ( \p TLIFunc ).
static void replaceWithTLIFunction(CallInst &CI, VFInfo &Info,
- Function *TLIFunc, FunctionType *VecFTy) {
+ Function *TLIVecFunc) {
IRBuilder<> IRBuilder(&CI);
SmallVector<Value *> Args(CI.args());
if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
- if (Args.size() == VecFTy->getNumParams())
+ if (Args.size() == TLIVecFunc->getFunctionType()->getNumParams())
static_assert(true && "mask was already in place");
auto *MaskTy =
@@ -88,9 +88,7 @@ static void replaceWithTLIFunction(CallInst &CI, VFInfo &Info,
// Preserve the operand bundles.
SmallVector<OperandBundleDef, 1> OpBundles;
CI.getOperandBundlesAsDefs(OpBundles);
- CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
- assert(VecFTy == TLIFunc->getFunctionType() &&
- "Expecting function types to be identical");
+ CallInst *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
CI.replaceAllUsesWith(Replacement);
// Preserve fast math flags for FP math.
if (isa<FPMathOperator>(Replacement))
@@ -102,10 +100,10 @@ static void replaceWithTLIFunction(CallInst &CI, VFInfo &Info,
static std::optional<const VecDesc *> getVecDesc(const TargetLibraryInfo &TLI,
const StringRef &ScalarName,
const ElementCount &VF) {
- if (auto *VDMasked = TLI.getVectorMappingInfo(ScalarName, VF, true))
- return VDMasked;
if (auto *VDNoMask = TLI.getVectorMappingInfo(ScalarName, VF, false))
return VDNoMask;
+ if (auto *VDMasked = TLI.getVectorMappingInfo(ScalarName, VF, true))
+ return VDMasked;
return std::nullopt;
}
@@ -117,20 +115,20 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
return false;
auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
- // Replacement is only performed for intrinsic functions
+ // Replacement is only performed for intrinsic functions.
if (IntrinsicID == Intrinsic::not_intrinsic)
return false;
// Convert vector arguments to scalar type and check that all vector operands
// have identical vector width.
ElementCount VF = ElementCount::getFixed(0);
- SmallVector<Type *> ScalarTypes;
+ SmallVector<Type *> ScalarArgTypes;
for (auto Arg : enumerate(CI.args())) {
auto *ArgTy = Arg.value()->getType();
if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
- ScalarTypes.push_back(ArgTy);
+ ScalarArgTypes.push_back(ArgTy);
} else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
- ScalarTypes.push_back(ArgTy->getScalarType());
+ ScalarArgTypes.push_back(ArgTy->getScalarType());
// Disallow vector arguments with different VFs. When processing the first
// vector argument, store it's VF, and for the rest ensure that they match
// it.
@@ -139,7 +137,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
else if (VF != VectorArgTy->getElementCount())
return false;
} else
- // enters when it is supposed to be a vector argument but it isn't.
+ // Exit when it is supposed to be a vector argument but it isn't.
return false;
}
@@ -147,7 +145,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// the intrinsic ID and the argument types converted to scalar above.
std::string ScalarName =
(Intrinsic::isOverloaded(IntrinsicID)
- ? Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule())
+ ? Intrinsic::getName(IntrinsicID, ScalarArgTypes, CI.getModule())
: Intrinsic::getName(IntrinsicID).str());
// The TargetLibraryInfo does not contain a vectorized version of the scalar
@@ -169,7 +167,8 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// Replace the call to the intrinsic with a call to the vector library
// function.
Type *ScalarRetTy = CI.getType()->getScalarType();
- FunctionType *ScalarFTy = FunctionType::get(ScalarRetTy, ScalarTypes, false);
+ FunctionType *ScalarFTy =
+ FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
const std::string MangledName = VD->getVectorFunctionABIVariantString();
auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
if (!OptInfo)
@@ -182,7 +181,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
Function *FuncToReplace = CI.getCalledFunction();
Function *TLIFunc = getTLIFunction(CI.getModule(), VectorFTy, FuncToReplace,
VD->getVectorFnName());
- replaceWithTLIFunction(CI, *OptInfo, TLIFunc, VectorFTy);
+ replaceWithTLIFunction(CI, *OptInfo, TLIFunc);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
<< FuncToReplace->getName() << "` with call to `"
More information about the llvm-commits
mailing list