[llvm-branch-commits] [llvm] PR for llvm/llvm-project#78725 (PR #79986)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Feb 6 16:46:32 PST 2024


https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/79986

>From 8091f1be3dc8c7b7c35452dc97f35b9d27aa6d69 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 26 Jan 2024 11:30:35 +0000
Subject: [PATCH] [LV] Fix handling of interleaving linear args (#78725)

Currently when interleaving vector calls with linear arguments,
the Part is ignored and all vector calls use the initial value
from the first lane of the current iteration.

Fix this to extract from the correct part of the linear vector.

(cherry picked from commit d4c01714239e80d21e441c3886749fc56b743f81)
---
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 12 +++--
 .../AArch64/vector-call-linear-args.ll        | 54 ++++++++++++-------
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index bbeb5da2cfec3e..ae2fc522ba4002 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -597,13 +597,15 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
     for (const auto &I : enumerate(operands())) {
       // Some intrinsics have a scalar argument - don't replace it with a
       // vector.
-      // Some vectorized function variants may also take a scalar argument,
-      // e.g. linear parameters for pointers.
       Value *Arg;
-      if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) ||
-          (UseIntrinsic &&
-           isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())))
+      if (UseIntrinsic &&
+          isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
         Arg = State.get(I.value(), VPIteration(0, 0));
+      // Some vectorized function variants may also take a scalar argument,
+      // e.g. linear parameters for pointers. This needs to be the scalar value
+      // from the start of the respective part when interleaving.
+      else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
+        Arg = State.get(I.value(), VPIteration(Part, 0));
       else
         Arg = State.get(I.value(), Part);
       if (UseIntrinsic &&
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index 29440ca174248f..f60ab5e848dd3a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -21,7 +21,8 @@ define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
 ; NEON_INTERLEAVE:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0
 ; NEON_INTERLEAVE:    [[TMP5:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP4]])
-; NEON_INTERLEAVE:    [[TMP6:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP4]])
+; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0
+; NEON_INTERLEAVE:    [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP6]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear8
@@ -34,8 +35,9 @@ define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; SVE_OR_NEON_INTERLEAVE:    [[TMP33:%.*]] = extractelement <vscale x 2 x ptr> [[TMP31:%.*]], i32 0
 ; SVE_OR_NEON_INTERLEAVE:    [[TMP34:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP33]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP35:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP33]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP47:%.*]] = extractelement <vscale x 2 x i1> [[TMP45:%.*]], i32 0
+; SVE_OR_NEON_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 2 x ptr> [[TMP32:%.*]], i32 0
+; SVE_OR_NEON_INTERLEAVE:    [[TMP36:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP35]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
+; SVE_OR_NEON_INTERLEAVE:    [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP46:%.*]], i32 0
 ; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear8
@@ -49,8 +51,9 @@ define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; SVE_TF_INTERLEAVE:    [[TMP33:%.*]] = extractelement <vscale x 2 x ptr> [[TMP31:%.*]], i32 0
 ; SVE_TF_INTERLEAVE:    [[TMP34:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP33]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP35:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP33]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP47:%.*]] = extractelement <vscale x 2 x i1> [[TMP45:%.*]], i32 0
+; SVE_TF_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 2 x ptr> [[TMP32:%.*]], i32 0
+; SVE_TF_INTERLEAVE:    [[TMP36:%.*]] = call <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr [[TMP35]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
+; SVE_TF_INTERLEAVE:    [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP46:%.*]], i32 0
 ; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
 ;
 entry:
@@ -81,7 +84,8 @@ define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
 ; NEON_INTERLEAVE:    [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6:%.*]], i32 0
 ; NEON_INTERLEAVE:    [[TMP9:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP8]])
-; NEON_INTERLEAVE:    [[TMP10:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD2:%.*]], ptr [[TMP8]])
+; NEON_INTERLEAVE:    [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP7:%.*]], i32 0
+; NEON_INTERLEAVE:    [[TMP11:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD2:%.*]], ptr [[TMP10]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_vector_linear4
@@ -176,7 +180,8 @@ define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n)
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
 ; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP4:%.*]], i32 0
 ; NEON_INTERLEAVE:    [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP6]])
-; NEON_INTERLEAVE:    [[TMP8:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP6]])
+; NEON_INTERLEAVE:    [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP5:%.*]], i32 0
+; NEON_INTERLEAVE:    [[TMP9:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP8]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride
@@ -228,7 +233,9 @@ define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly
 ; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0
 ; NEON_INTERLEAVE:    [[TMP7:%.*]] = extractelement <4 x ptr> [[TMP4:%.*]], i32 0
 ; NEON_INTERLEAVE:    [[TMP8:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP6]], ptr [[TMP7]])
-; NEON_INTERLEAVE:    [[TMP9:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP6]], ptr [[TMP7]])
+; NEON_INTERLEAVE:    [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0
+; NEON_INTERLEAVE:    [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP5:%.*]], i32 0
+; NEON_INTERLEAVE:    [[TMP11:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP9]], ptr [[TMP10]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear4_linear8
@@ -243,8 +250,10 @@ define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly
 ; SVE_OR_NEON_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 4 x ptr> [[TMP31:%.*]], i32 0
 ; SVE_OR_NEON_INTERLEAVE:    [[TMP36:%.*]] = extractelement <vscale x 4 x ptr> [[TMP33:%.*]], i32 0
 ; SVE_OR_NEON_INTERLEAVE:    [[TMP37:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP38:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP50:%.*]] = extractelement <vscale x 4 x i1> [[TMP48:%.*]], i32 0
+; SVE_OR_NEON_INTERLEAVE:    [[TMP38:%.*]] = extractelement <vscale x 4 x ptr> [[TMP32:%.*]], i32 0
+; SVE_OR_NEON_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 4 x ptr> [[TMP34:%.*]], i32 0
+; SVE_OR_NEON_INTERLEAVE:    [[TMP40:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK2:%.*]])
+; SVE_OR_NEON_INTERLEAVE:    [[TMP52:%.*]] = extractelement <vscale x 4 x i1> [[TMP50:%.*]], i32 0
 ; SVE_OR_NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear4_linear8
@@ -260,8 +269,10 @@ define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly
 ; SVE_TF_INTERLEAVE:    [[TMP35:%.*]] = extractelement <vscale x 4 x ptr> [[TMP31:%.*]], i32 0
 ; SVE_TF_INTERLEAVE:    [[TMP36:%.*]] = extractelement <vscale x 4 x ptr> [[TMP33:%.*]], i32 0
 ; SVE_TF_INTERLEAVE:    [[TMP37:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP38:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP50:%.*]] = extractelement <vscale x 4 x i1> [[TMP48:%.*]], i32 0
+; SVE_TF_INTERLEAVE:    [[TMP38:%.*]] = extractelement <vscale x 4 x ptr> [[TMP32:%.*]], i32 0
+; SVE_TF_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 4 x ptr> [[TMP34:%.*]], i32 0
+; SVE_TF_INTERLEAVE:    [[TMP40:%.*]] = call <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK2:%.*]])
+; SVE_TF_INTERLEAVE:    [[TMP52:%.*]] = extractelement <vscale x 4 x i1> [[TMP50:%.*]], i32 0
 ; SVE_TF_INTERLEAVE:    [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]]
 ;
 entry:
@@ -293,7 +304,8 @@ define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) {
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
 ; NEON_INTERLEAVE:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0
 ; NEON_INTERLEAVE:    [[TMP5:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP4]])
-; NEON_INTERLEAVE:    [[TMP6:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP4]])
+; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0
+; NEON_INTERLEAVE:    [[TMP7:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP6]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr
@@ -343,7 +355,8 @@ define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) {
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
 ; NEON_INTERLEAVE:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0
 ; NEON_INTERLEAVE:    [[TMP5:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP4]])
-; NEON_INTERLEAVE:    [[TMP6:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP4]])
+; NEON_INTERLEAVE:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0
+; NEON_INTERLEAVE:    [[TMP7:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP6]])
 ; NEON_INTERLEAVE:    [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
@@ -393,7 +406,8 @@ define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n)
 ; NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) {
 ; NEON_INTERLEAVE:    [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6:%.*]], i32 0
 ; NEON_INTERLEAVE:    call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP8]])
-; NEON_INTERLEAVE:    call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD2:%.*]], ptr [[TMP8]])
+; NEON_INTERLEAVE:    [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP7:%.*]], i32 0
+; NEON_INTERLEAVE:    call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD2:%.*]], ptr [[TMP9]])
 ; NEON_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]]
 ;
 ; SVE_OR_NEON-LABEL: define void @test_linear8_return_void
@@ -406,8 +420,9 @@ define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n)
 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; SVE_OR_NEON_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 2 x ptr> [[TMP37:%.*]], i32 0
 ; SVE_OR_NEON_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP39]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_OR_NEON_INTERLEAVE:    [[TMP45:%.*]] = extractelement <vscale x 2 x i1> [[TMP43:%.*]], i32 0
+; SVE_OR_NEON_INTERLEAVE:    [[TMP40:%.*]] = extractelement <vscale x 2 x ptr> [[TMP38:%.*]], i32 0
+; SVE_OR_NEON_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
+; SVE_OR_NEON_INTERLEAVE:    [[TMP46:%.*]] = extractelement <vscale x 2 x i1> [[TMP44:%.*]], i32 0
 ; SVE_OR_NEON_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]]
 ;
 ; SVE_TF-LABEL: define void @test_linear8_return_void
@@ -421,8 +436,9 @@ define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n)
 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; SVE_TF_INTERLEAVE:    [[TMP39:%.*]] = extractelement <vscale x 2 x ptr> [[TMP37:%.*]], i32 0
 ; SVE_TF_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; SVE_TF_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP39]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
-; SVE_TF_INTERLEAVE:    [[TMP45:%.*]] = extractelement <vscale x 2 x i1> [[TMP43:%.*]], i32 0
+; SVE_TF_INTERLEAVE:    [[TMP40:%.*]] = extractelement <vscale x 2 x ptr> [[TMP38:%.*]], i32 0
+; SVE_TF_INTERLEAVE:    call void @vec_goo_linear8_mask_sve(<vscale x 2 x i64> [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2:%.*]])
+; SVE_TF_INTERLEAVE:    [[TMP46:%.*]] = extractelement <vscale x 2 x i1> [[TMP44:%.*]], i32 0
 ; SVE_TF_INTERLEAVE:    call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]]
 ;
 entry:



More information about the llvm-branch-commits mailing list