[llvm] 50a1ab1 - [LAA] Don't assume libcalls with output/input pointers can be vectorized (#108980)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 23 08:06:00 PDT 2024


Author: Benjamin Maxwell
Date: 2024-09-23T16:05:55+01:00
New Revision: 50a1ab12abbe948e6d3f8418f11bfa1951c8d19e

URL: https://github.com/llvm/llvm-project/commit/50a1ab12abbe948e6d3f8418f11bfa1951c8d19e
DIFF: https://github.com/llvm/llvm-project/commit/50a1ab12abbe948e6d3f8418f11bfa1951c8d19e.diff

LOG: [LAA] Don't assume libcalls with output/input pointers can be vectorized (#108980)

LoopAccessAnalysis currently does not check/track aliasing from the
output pointers, but assumes vectorizing library calls with a mapping is
safe.

This can result in incorrect codegen if something like the following is
vectorized:

```
for(int i=0; i<N; i++) {
  // No aliasing between input and output pointers detected.
  sincos(cos_out[0], sin_out+i, cos_out+i);
}
```

Where for VF >= 2 `cos_out[1]` to `cos_out[VF-1]` is the cosine of the
original value of `cos_out[0]` not the updated value.

Added: 
    

Modified: 
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
    llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
    llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 980f142f113265..3f189724763d47 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2449,13 +2449,20 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
         continue;
 
       // If this is a load, save it. If this instruction can read from memory
-      // but is not a load, then we quit. Notice that we don't handle function
-      // calls that read or write.
+      // but is not a load, we only allow it if it's a call to a function with a
+      // vector mapping and no pointer arguments.
       if (I.mayReadFromMemory()) {
-        // If the function has an explicit vectorized counterpart, we can safely
-        // assume that it can be vectorized.
+        auto hasPointerArgs = [](CallBase *CB) {
+          return any_of(CB->args(), [](Value const *Arg) {
+            return Arg->getType()->isPointerTy();
+          });
+        };
+
+        // If the function has an explicit vectorized counterpart, and does not
+        // take output/input pointers, we can safely assume that it can be
+        // vectorized.
         if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
-            !VFDatabase::getMappings(*Call).empty())
+            !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty())
           continue;
 
         auto *Ld = dyn_cast<LoadInst>(&I);

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 498f2059ffb0c4..7797c0bce0baa4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -54,9 +54,9 @@ for.end:                                 ; preds = %for.body, %entry
 
 define void @vec_ptr(i64 %N, ptr noalias %a, ptr readnone %b) {
 ; CHECK-LABEL: @vec_ptr
-; CHECK: vector.body:
-; CHECK: %[[LOAD:.*]] = load <vscale x 2 x ptr>, ptr
-; CHECK: call <vscale x 2 x i64> @bar_vec(<vscale x 2 x ptr> %[[LOAD]])
+; CHECK: for.body:
+; CHECK: %[[LOAD:.*]] = load ptr, ptr
+; CHECK: call i64 @bar(ptr %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
   br i1 %cmp7, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
index d9cc630482fc80..41ccb3c404dd76 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
@@ -2902,35 +2902,36 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
+; FIXME: Re-enable modf[f] vectorization once aliasing issues due to output
+; pointers have been resolved.
+
 declare double @modf(double, ptr)
 declare float @modff(float, ptr)
 
 define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; SLEEF-NEON-LABEL: define void @modf_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
+; SLEEF-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; SLEEF-SVE-LABEL: define void @modf_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
+; SLEEF-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; SLEEF-SVE-NOPRED-LABEL: define void @modf_f64
 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE-NOPRED:    [[TMP17:%.*]] = call <vscale x 2 x double> @_ZGVsNxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]])
-; SLEEF-SVE-NOPRED:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR64:[0-9]+]]
+; SLEEF-SVE-NOPRED:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; ARMPL-NEON-LABEL: define void @modf_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
+; ARMPL-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; ARMPL-SVE-LABEL: define void @modf_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP23:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; ARMPL-SVE-NOPRED-LABEL: define void @modf_f64
 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE-NOPRED:    [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; ARMPL-SVE-NOPRED:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR64:[0-9]+]]
+; ARMPL-SVE-NOPRED:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 entry:
   br label %for.body
@@ -2954,29 +2955,27 @@ for.cond.cleanup:
 define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; SLEEF-NEON-LABEL: define void @modf_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
+; SLEEF-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; SLEEF-SVE-LABEL: define void @modf_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
+; SLEEF-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; SLEEF-SVE-NOPRED-LABEL: define void @modf_f32
 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE-NOPRED:    [[TMP17:%.*]] = call <vscale x 4 x float> @_ZGVsNxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]])
-; SLEEF-SVE-NOPRED:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]]
+; SLEEF-SVE-NOPRED:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; ARMPL-NEON-LABEL: define void @modf_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
+; ARMPL-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; ARMPL-SVE-LABEL: define void @modf_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP23:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 ; ARMPL-SVE-NOPRED-LABEL: define void @modf_f32
 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE-NOPRED:    [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; ARMPL-SVE-NOPRED:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]]
+; ARMPL-SVE-NOPRED:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
 ;
 entry:
   br label %for.body
@@ -3276,35 +3275,36 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
+; FIXME: Re-enable sincos[f] vectorization once aliasing issues with output
+; pointers have been resolved.
+
 declare void @sincos(double, ptr, ptr)
 declare void @sincosf(float, ptr, ptr)
 
 define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; SLEEF-NEON-LABEL: define void @sincos_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; SLEEF-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-LABEL: define void @sincos_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR6:[0-9]+]]
+; SLEEF-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-NOPRED-LABEL: define void @sincos_f64
 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE-NOPRED:    call void @_ZGVsNxvl8l8_sincos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
-; SLEEF-SVE-NOPRED:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR72:[0-9]+]]
+; SLEEF-SVE-NOPRED:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-NEON-LABEL: define void @sincos_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; ARMPL-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-LABEL: define void @sincos_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-NOPRED-LABEL: define void @sincos_f64
 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE-NOPRED:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; ARMPL-SVE-NOPRED:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR72:[0-9]+]]
+; ARMPL-SVE-NOPRED:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 entry:
   br label %for.body
@@ -3327,29 +3327,27 @@ for.cond.cleanup:
 define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; SLEEF-NEON-LABEL: define void @sincos_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; SLEEF-NEON:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-LABEL: define void @sincos_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR7:[0-9]+]]
+; SLEEF-SVE:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-NOPRED-LABEL: define void @sincos_f32
 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE-NOPRED:    call void @_ZGVsNxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
-; SLEEF-SVE-NOPRED:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR73:[0-9]+]]
+; SLEEF-SVE-NOPRED:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-NEON-LABEL: define void @sincos_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; ARMPL-NEON:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-LABEL: define void @sincos_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-NOPRED-LABEL: define void @sincos_f32
 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE-NOPRED:    call void @armpl_svsincos_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; ARMPL-SVE-NOPRED:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR73:[0-9]+]]
+; ARMPL-SVE-NOPRED:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 entry:
   br label %for.body
@@ -3369,35 +3367,36 @@ for.cond.cleanup:
   ret void
 }
 
+; FIXME: Re-enable sincospi[f] vectorization once aliasing issues with output
+; pointers have been resolved.
+
 declare void @sincospi(double, ptr, ptr)
 declare void @sincospif(float, ptr, ptr)
 
 define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; SLEEF-NEON-LABEL: define void @sincospi_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincospi(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; SLEEF-NEON:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-LABEL: define void @sincospi_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR8:[0-9]+]]
+; SLEEF-SVE:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-NOPRED-LABEL: define void @sincospi_f64
 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE-NOPRED:    call void @_ZGVsNxvl8l8_sincospi(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
-; SLEEF-SVE-NOPRED:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR74:[0-9]+]]
+; SLEEF-SVE-NOPRED:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-NEON-LABEL: define void @sincospi_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; ARMPL-NEON:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-LABEL: define void @sincospi_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincospi_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-NOPRED-LABEL: define void @sincospi_f64
 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE-NOPRED:    call void @armpl_svsincospi_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; ARMPL-SVE-NOPRED:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR74:[0-9]+]]
+; ARMPL-SVE-NOPRED:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 entry:
   br label %for.body
@@ -3420,29 +3419,27 @@ for.cond.cleanup:
 define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; SLEEF-NEON-LABEL: define void @sincospi_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincospif(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; SLEEF-NEON:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-LABEL: define void @sincospi_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR9:[0-9]+]]
+; SLEEF-SVE:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; SLEEF-SVE-NOPRED-LABEL: define void @sincospi_f32
 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE-NOPRED:    call void @_ZGVsNxvl4l4_sincospif(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
-; SLEEF-SVE-NOPRED:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR75:[0-9]+]]
+; SLEEF-SVE-NOPRED:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-NEON-LABEL: define void @sincospi_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; ARMPL-NEON:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-LABEL: define void @sincospi_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincospi_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 ; ARMPL-SVE-NOPRED-LABEL: define void @sincospi_f32
 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE-NOPRED:    call void @armpl_svsincospi_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; ARMPL-SVE-NOPRED:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR75:[0-9]+]]
+; ARMPL-SVE-NOPRED:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
 ;
 entry:
   br label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index f60ab5e848dd3a..29904a7822131b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -13,48 +13,27 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; NEON-LABEL: define void @test_linear8
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
-; NEON:    [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1:%.*]], i32 0
-; NEON:    [[TMP3:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP2]])
 ; NEON:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
 ;
 ; NEON_INTERLEAVE-LABEL: define void @test_linear8
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
-; NEON_INTERLEAVE:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP5:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP4]])
-; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP6]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear8
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; SVE_OR_NEON:    [[TMP14:%.*]] = extractelement <vscale x 2 x ptr> [[TMP13:%.*]], i32 0
-; SVE_OR_NEON:    [[TMP15:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_nomask_sve(ptr [[TMP14]])
 ; SVE_OR_NEON:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; SVE_OR_NEON_INTERLEAVE:    [[TMP33:%.*]] = extractelement <vscale x 2 x ptr> [[TMP31:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[TMP34:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP33]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 2 x ptr> [[TMP32:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[TMP36:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP35]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP46:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
+; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear8
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; SVE_TF:    [[TMP20:%.*]] = extractelement <vscale x 2 x ptr> [[TMP19:%.*]], i32 0
-; SVE_TF:    [[TMP21:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP20]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF:    [[TMP25:%.*]] = extractelement <vscale x 2 x i1> [[TMP24:%.*]], i32 0
-; SVE_TF:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
+; SVE_TF:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; SVE_TF_INTERLEAVE:    [[TMP33:%.*]] = extractelement <vscale x 2 x ptr> [[TMP31:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[TMP34:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP33]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 2 x ptr> [[TMP32:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[TMP36:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP35]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP46:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
+; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -76,35 +55,27 @@ for.cond.cleanup:
 define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
 ; NEON-LABEL: define void @test_vector_linear4
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
-; NEON:    [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0
-; NEON:    [[TMP5:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4]])
 ; NEON:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
 ;
 ; NEON_INTERLEAVE-LABEL: define void @test_vector_linear4
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
-; NEON_INTERLEAVE:    [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP9:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP8]])
-; NEON_INTERLEAVE:    [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP7:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP11:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD2:%.*]], ptr [[TMP10]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_vector_linear4
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON:    [[TMP16:%.*]] = extractelement <vscale x 4 x ptr> [[TMP15:%.*]], i32 0
-; SVE_OR_NEON:    [[TMP17:%.*]] = call <vscale x 4 x i32> @vec_baz_vector_linear4_nomask_sve(<vscale x 4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP16]])
 ; SVE_OR_NEON:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_vector_linear4
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_vector_linear4
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE_TF:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_vector_linear4
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -132,9 +103,7 @@ define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
 ;
 ; NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
-; NEON_INTERLEAVE:    [[TMP4:%.*]] = call i64 @foo(ptr [[TMP2:%.*]]) #[[ATTR2:[0-9]+]]
-; NEON_INTERLEAVE:    [[TMP5:%.*]] = call i64 @foo(ptr [[TMP3:%.*]]) #[[ATTR2]]
-; NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
+; NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
@@ -142,15 +111,15 @@ define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear8_bad_stride
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE_TF:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_bad_stride
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -172,35 +141,27 @@ for.cond.cleanup:
 define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; NEON-LABEL: define void @test_linear16_wide_stride
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
-; NEON:    [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0
-; NEON:    [[TMP4:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP3]])
 ; NEON:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
 ;
 ; NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
-; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP4:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP6]])
-; NEON_INTERLEAVE:    [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP5:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP9:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP8]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON:    [[TMP15:%.*]] = extractelement <vscale x 2 x ptr> [[TMP14:%.*]], i32 0
-; SVE_OR_NEON:    [[TMP16:%.*]] = call <vscale x 2 x i64> @vec_foo_linear16_nomask_sve(ptr [[TMP15]])
 ; SVE_OR_NEON:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]]
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]]
+; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
 ;
 ; SVE_TF-LABEL: define void @test_linear16_wide_stride
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]]
+; SVE_TF:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear16_wide_stride
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]]
+; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
 ;
 entry:
   br label %for.body
@@ -223,57 +184,27 @@ for.cond.cleanup:
 define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
 ; NEON-LABEL: define void @test_linear4_linear8
 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
-; NEON:    [[TMP3:%.*]] = extractelement <4 x ptr> [[TMP1:%.*]], i32 0
-; NEON:    [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0
-; NEON:    [[TMP5:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP3]], ptr [[TMP4]])
 ; NEON:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 ; NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
-; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP7:%.*]] = extractelement <4 x ptr> [[TMP4:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP8:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP6]], ptr [[TMP7]])
-; NEON_INTERLEAVE:    [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP5:%.*]], i32 0
-; NEON_INTERLEAVE:    [[TMP11:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP9]], ptr [[TMP10]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear4_linear8
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON:    [[TMP15:%.*]] = extractelement <vscale x 4 x ptr> [[TMP13:%.*]], i32 0
-; SVE_OR_NEON:    [[TMP16:%.*]] = extractelement <vscale x 4 x ptr> [[TMP14:%.*]], i32 0
-; SVE_OR_NEON:    [[TMP17:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP15]], ptr [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; SVE_OR_NEON:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 4 x ptr> [[TMP31:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[TMP36:%.*]] = extractelement <vscale x 4 x ptr> [[TMP33:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[TMP37:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP38:%.*]] = extractelement <vscale x 4 x ptr> [[TMP32:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 4 x ptr> [[TMP34:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[TMP40:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP52:%.*]] = extractelement <vscale x 4 x i1> [[TMP50:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear4_linear8
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF:    [[TMP21:%.*]] = extractelement <vscale x 4 x ptr> [[TMP19:%.*]], i32 0
-; SVE_TF:    [[TMP22:%.*]] = extractelement <vscale x 4 x ptr> [[TMP20:%.*]], i32 0
-; SVE_TF:    [[TMP23:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP21]], ptr [[TMP22]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF:    [[TMP27:%.*]] = extractelement <vscale x 4 x i1> [[TMP26:%.*]], i32 0
-; SVE_TF:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE_TF:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear4_linear8
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 4 x ptr> [[TMP31:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[TMP36:%.*]] = extractelement <vscale x 4 x ptr> [[TMP33:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[TMP37:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP38:%.*]] = extractelement <vscale x 4 x ptr> [[TMP32:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 4 x ptr> [[TMP34:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[TMP40:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP52:%.*]] = extractelement <vscale x 4 x i1> [[TMP50:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -310,21 +241,21 @@ define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) {
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON:    [[TMP14:%.*]] = extractelement <vscale x 4 x i32> [[TMP13:%.*]], i32 0
-; SVE_OR_NEON:    [[TMP15:%.*]] = call <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32 [[TMP14]])
+; SVE_OR_NEON:    [[TMP13:%.*]] = extractelement <vscale x 4 x i32> [[TMP12:%.*]], i32 0
+; SVE_OR_NEON:    [[TMP14:%.*]] = call <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32 [[TMP13]])
 ; SVE_OR_NEON:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]]
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]]
+; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear3_non_ptr
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]]
+; SVE_TF:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear3_non_ptr
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]]
+; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -361,21 +292,21 @@ define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) {
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON:    [[TMP14:%.*]] = extractelement <vscale x 4 x i32> [[TMP13:%.*]], i32 0
-; SVE_OR_NEON:    [[TMP15:%.*]] = call <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32 [[TMP14]])
+; SVE_OR_NEON:    [[TMP13:%.*]] = extractelement <vscale x 4 x i32> [[TMP12:%.*]], i32 0
+; SVE_OR_NEON:    [[TMP14:%.*]] = call <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32 [[TMP13]])
 ; SVE_OR_NEON:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]]
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]]
+; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride
 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]]
+; SVE_TF:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]]
+; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -398,48 +329,27 @@ for.cond.cleanup:
 define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) {
 ; NEON-LABEL: define void @test_linear8_return_void
 ; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) {
-; NEON:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0
-; NEON:    call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP4]])
 ; NEON:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]]
 ;
 ; NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) {
-; NEON_INTERLEAVE:    [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6:%.*]], i32 0
-; NEON_INTERLEAVE:    call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP8]])
-; NEON_INTERLEAVE:    [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP7:%.*]], i32 0
-; NEON_INTERLEAVE:    call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD2:%.*]], ptr [[TMP9]])
 ; NEON_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear8_return_void
 ; SVE_OR_NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON:    [[TMP16:%.*]] = extractelement <vscale x 2 x ptr> [[TMP15:%.*]], i32 0
-; SVE_OR_NEON:    call void @vec_goo_linear8_nomask_sve(<vscale x 2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP16]])
 ; SVE_OR_NEON:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR8:[0-9]+]]
 ;
 ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_OR_NEON_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 2 x ptr> [[TMP37:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP40:%.*]] = extractelement <vscale x 2 x ptr> [[TMP38:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP46:%.*]] = extractelement <vscale x 2 x i1> [[TMP44:%.*]], i32 0
-; SVE_OR_NEON_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE_OR_NEON_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear8_return_void
 ; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF:    [[TMP22:%.*]] = extractelement <vscale x 2 x ptr> [[TMP21:%.*]], i32 0
-; SVE_TF:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF:    [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[TMP23:%.*]], i32 0
-; SVE_TF:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE_TF:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]]
 ;
 ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_return_void
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; SVE_TF_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 2 x ptr> [[TMP37:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP40:%.*]] = extractelement <vscale x 2 x ptr> [[TMP38:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP46:%.*]] = extractelement <vscale x 2 x i1> [[TMP44:%.*]], i32 0
-; SVE_TF_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE_TF_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]]
 ;
 entry:
   br label %for.body
@@ -458,6 +368,9 @@ for.cond.cleanup:
   ret void
 }
 
+; Note: Vectorizing pointer arguments is currently disabled as LAA cannot detect
+; aliasing from output/input pointers.
+
 declare i64 @foo(ptr)
 declare i32 @baz(i32, ptr)
 declare i32 @quux(ptr, ptr)


        


More information about the llvm-commits mailing list