[llvm-branch-commits] [llvm] [LAA] Detect cross-iteration WAW when writing to the same pointer (PR #187802)

Fri Mar 20 14:47:56 PDT 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Andrei Elovikov (eas)

<details>
<summary>Changes</summary>

Fixes https://github.com/llvm/llvm-project/issues/187402.

---

Patch is 22.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/187802.diff


3 Files Affected:

- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+39-12) 
- (added) llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll (+413) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll (+12-28) 


``````````diff

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 5f4f305506d40..202665ff6bded 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -872,7 +872,7 @@ class AccessAnalysis {
 
   /// Goes over all memory accesses, checks whether a RT check is needed
   /// and builds sets of dependent accesses.
-  void buildDependenceSets();
+  void buildDependenceSets(const MemoryDepChecker &DepChecker);
 
   /// Initial processing of memory accesses determined that we need to
   /// perform dependency checking.
@@ -1520,7 +1520,16 @@ bool AccessAnalysis::canCheckPtrAtRT(
   return CanDoRTIfNeeded;
 }
 
-void AccessAnalysis::buildDependenceSets() {
+static bool isInvariant(Value *V, const Loop *TheLoop, ScalarEvolution *SE) {
+  if (TheLoop->isLoopInvariant(V))
+    return true;
+  if (!SE->isSCEVable(V->getType()))
+    return false;
+  const SCEV *S = SE->getSCEV(V);
+  return SE->isLoopInvariant(S, TheLoop);
+}
+
+void AccessAnalysis::buildDependenceSets(const MemoryDepChecker &DepChecker) {
   // We process the set twice: first we process read-write pointers, last we
   // process read-only pointers. This allows us to skip dependence tests for
   // read-only pointers.
@@ -1602,7 +1611,31 @@ void AccessAnalysis::buildDependenceSets() {
           // this is a read only check other writes for conflicts (but only if
           // there is no other write to the ptr - this is an optimization to
           // catch "a[i] = a[i] + " without having to do a dependence check).
-          if ((IsWrite || IsReadOnlyPtr) && AliasSetHasWrite) {
+          //
+          // If there are multiple writes into the same pointer we need to make
+          // sure that there are no cross-iteration dependencies between those
+          // writes to avoid the following scenario:
+          //
+          //   code:
+          //     if (RT_COND0) *p = x;
+          //     if (RT_COND1) *p = y;
+          //
+          //   execution:
+          //     Iter0     |  Iter1
+          //    no store   |   *p = 2
+          //     *p = 1    |  no store
+          //
+          // Scalar loop would leave `*p == 2`, yet two vectorized scatter's
+          // would result in `*p == 1` which is wrong.
+          //
+          // NOTE: Known invariant stores are handled separately in both this
+          // file and LoopVectorizationLegality to support the case when
+          // reduction wasn't completely transformed into SSA form.
+          bool MultipleNonInvariantStoresToPtrExist =
+              DepChecker.getOrderForAccess(Ptr, true).size() > 1 &&
+              !::isInvariant(Ptr, TheLoop, PSE.getSE());
+          if ((IsWrite || IsReadOnlyPtr) &&
+              (AliasSetHasWrite || MultipleNonInvariantStoresToPtrExist)) {
             CheckDeps.push_back(Access);
             IsRTCheckAnalysisNeeded = true;
           }
@@ -2775,14 +2808,14 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
   // If we write (or read-write) to a single destination and there are no other
   // reads in this loop then is it safe to vectorize: the vectorized stores
   // preserve ordering via replication or order-preserving @llvm.masked.scatter.
-  if (NumReadWrites == 1 && NumReads == 0) {
+  if (NumReadWrites == 1 && NumReads == 0 && Stores.size() == 1) {
     LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
     return true;
   }
 
   // Build dependence sets and check whether we need a runtime pointer bounds
   // check.
-  Accesses.buildDependenceSets();
+  Accesses.buildDependenceSets(getDepChecker());
 
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
@@ -2955,13 +2988,7 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
 }
 
 bool LoopAccessInfo::isInvariant(Value *V) const {
-  auto *SE = PSE->getSE();
-  if (TheLoop->isLoopInvariant(V))
-    return true;
-  if (!SE->isSCEVable(V->getType()))
-    return false;
-  const SCEV *S = SE->getSCEV(V);
-  return SE->isLoopInvariant(S, TheLoop);
+  return ::isInvariant(V, TheLoop, PSE->getSE());
 }
 
 /// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll
new file mode 100644
index 0000000000000..bdfee12db5282
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll
@@ -0,0 +1,413 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes='print<access-info>' -disable-output < %s -enable-mem-access-versioning=false 2>&1 | FileCheck %s
+
+; Could be statically known conflict happens, fine for vectorization becuase of
+; the ordered replicated store/scatter semantics.
+define void @waw_no_mask(ptr %p, i64 %stride, i64 %n) {
+; CHECK-LABEL: 'waw_no_mask'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = and i64 %iv, u0xffff0
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  store i64 %iv, ptr %gep
+  store i64 %iv.next, ptr %gep
+
+  %exitcond = icmp slt i64 %iv.next, %n
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Could be statically known conflict happens, unsafe to vectorize.
+; FIXME: https://github.com/llvm/llvm-project/issues/187402
+define void @waw_mask(ptr %p, i64 %stride, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'waw_mask'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = and i64 %iv, u0xffff0
+
+  %c0 = icmp sle i64 %iv, %n0
+  %c1 = icmp sle i64 %iv, %n1
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  br i1 %c0, label %store0, label %merge
+
+store0:
+  store i64 %iv, ptr %gep
+  br label %merge
+
+merge:
+  br i1 %c1, label %store1, label %latch
+
+store1:
+  store i64 %iv.next, ptr %gep
+  br label %latch
+
+latch:
+  %exitcond = icmp slt i64 %iv.next, %n
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Same as @waw_no_mask but with run-time strided access, so can be speculated `%stride != 0`.
+define void @waw_no_mask_unknown_stride(ptr %p, i64 %stride, i64 %n) {
+; CHECK-LABEL: 'waw_no_mask_unknown_stride'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  store i64 %iv, ptr %gep
+  store i64 %iv.next, ptr %gep
+
+  %exitcond = icmp slt i64 %iv.next, %n
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Same as @waw_mask but with run-time strided access, so can be speculated `%stride != 0`.
+; FIXME: https://github.com/llvm/llvm-project/issues/187402
+define void @waw_mask_unknown_stride(ptr %p, i64 %stride, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'waw_mask_unknown_stride'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %c0 = icmp sle i64 %iv, %n0
+  %c1 = icmp sle i64 %iv, %n1
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  br i1 %c0, label %store0, label %merge
+
+store0:
+  store i64 %iv, ptr %gep
+  br label %merge
+
+merge:
+  br i1 %c1, label %store1, label %latch
+
+store1:
+  store i64 %iv.next, ptr %gep
+  br label %latch
+
+latch:
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Safe to vectorize.
+define void @no_cross_iter_dependency(ptr %p, i8 %a, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'no_cross_iter_dependency'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %a.zext = zext i8 %a to i64
+  %stride = add i64 %a.zext, 1 ; known non-zero
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %c0 = icmp sle i64 %iv, %n0
+  %c1 = icmp sle i64 %iv, %n1
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  br i1 %c0, label %store0, label %merge
+
+store0:
+  store i64 %iv, ptr %gep
+  br label %merge
+
+merge:
+  br i1 %c1, label %store1, label %latch
+
+store1:
+  store i64 %iv.next, ptr %gep
+  br label %latch
+
+latch:
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Safe to vectorize.
+define void @const_stride(ptr %p, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'const_stride'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, 5
+
+  %c0 = icmp sle i64 %iv, %n0
+  %c1 = icmp sle i64 %iv, %n1
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  br i1 %c0, label %store0, label %merge
+
+store0:
+  store i64 %iv, ptr %gep
+  br label %merge
+
+merge:
+  br i1 %c1, label %store1, label %latch
+
+store1:
+  store i64 %iv.next, ptr %gep
+  br label %latch
+
+latch:
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+define void @indirect_single_store(ptr noalias %p, i64 %n) {
+; CHECK-LABEL: 'indirect_single_store'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+
+  %gep.ld = getelementptr ptr, ptr %p, i64 %iv
+  %gep = load ptr, ptr %gep.ld
+
+  store i64 %iv, ptr %gep
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+define void @indirect_no_mask(ptr noalias %p, i64 %n) {
+; CHECK-LABEL: 'indirect_no_mask'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+
+  %gep.ld = getelementptr ptr, ptr %p, i64 %iv
+  %gep = load ptr, ptr %gep.ld
+
+  store i64 %iv, ptr %gep
+  store i64 %iv.next, ptr %gep
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+define void @indirect_mask(ptr noalias %p, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'indirect_mask'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT:            store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %iv.next = add nsw i64 %iv, 1
+
+  %gep.ld = getelementptr ptr, ptr %p, i64 %iv
+  %gep = load ptr, ptr %gep.ld
+
+  %c0 = icmp sle i64 %iv, %n0
+  %c1 = icmp sle i64 %iv, %n1
+
+  br i1 %c0, label %store0, label %merge
+
+store0:
+  store i64 %iv, ptr %gep
+  br label %merge
+
+merge:
+  br i1 %c1, label %store1, label %latch
+
+store1:
+  store i64 %iv.next, ptr %gep
+  br label %latch
+
+latch:
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
index fabab210fb850..3ea068440ce22 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
@@ -162,35 +162,19 @@ exit:
 define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2, ptr %p3, i64 %N) {
 ; CHECK-LABEL: @store_to_addr_generated_from_invariant_addr(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], 1
-; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P0:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AVL:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]]
-; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P1:%.*]], i64 [[IV]]
+; CHECK-NEXT:    store ptr [[P0:%.*]], ptr [[ARRAYIDX11]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP8]], i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/187802