[llvm-branch-commits] [llvm] [LAA] Detect cross-iteration WAW when writing to the same pointer (PR #187802)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Mar 20 14:47:56 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Andrei Elovikov (eas)
<details>
<summary>Changes</summary>
Fixes https://github.com/llvm/llvm-project/issues/187402.
---
Patch is 22.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/187802.diff
3 Files Affected:
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+39-12)
- (added) llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll (+413)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll (+12-28)
``````````diff
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 5f4f305506d40..202665ff6bded 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -872,7 +872,7 @@ class AccessAnalysis {
/// Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
- void buildDependenceSets();
+ void buildDependenceSets(const MemoryDepChecker &DepChecker);
/// Initial processing of memory accesses determined that we need to
/// perform dependency checking.
@@ -1520,7 +1520,16 @@ bool AccessAnalysis::canCheckPtrAtRT(
return CanDoRTIfNeeded;
}
-void AccessAnalysis::buildDependenceSets() {
+static bool isInvariant(Value *V, const Loop *TheLoop, ScalarEvolution *SE) {
+ if (TheLoop->isLoopInvariant(V))
+ return true;
+ if (!SE->isSCEVable(V->getType()))
+ return false;
+ const SCEV *S = SE->getSCEV(V);
+ return SE->isLoopInvariant(S, TheLoop);
+}
+
+void AccessAnalysis::buildDependenceSets(const MemoryDepChecker &DepChecker) {
// We process the set twice: first we process read-write pointers, last we
// process read-only pointers. This allows us to skip dependence tests for
// read-only pointers.
@@ -1602,7 +1611,31 @@ void AccessAnalysis::buildDependenceSets() {
// this is a read only check other writes for conflicts (but only if
// there is no other write to the ptr - this is an optimization to
// catch "a[i] = a[i] + " without having to do a dependence check).
- if ((IsWrite || IsReadOnlyPtr) && AliasSetHasWrite) {
+ //
+ // If there are multiple writes into the same pointer we need to make
+ // sure that there are no cross-iteration dependencies between those
+ // writes to avoid the following scenario:
+ //
+ // code:
+ // if (RT_COND0) *p = x;
+ // if (RT_COND1) *p = y;
+ //
+ // execution:
+ // Iter0 | Iter1
+ // no store | *p = 2
+ // *p = 1 | no store
+ //
+ // Scalar loop would leave `*p == 2`, yet two vectorized scatter's
+ // would result in `*p == 1` which is wrong.
+ //
+ // NOTE: Known invariant stores are handled separately in both this
+ // file and LoopVectorizationLegality to support the case when
+ // reduction wasn't completely transformed into SSA form.
+ bool MultipleNonInvariantStoresToPtrExist =
+ DepChecker.getOrderForAccess(Ptr, true).size() > 1 &&
+ !::isInvariant(Ptr, TheLoop, PSE.getSE());
+ if ((IsWrite || IsReadOnlyPtr) &&
+ (AliasSetHasWrite || MultipleNonInvariantStoresToPtrExist)) {
CheckDeps.push_back(Access);
IsRTCheckAnalysisNeeded = true;
}
@@ -2775,14 +2808,14 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
// If we write (or read-write) to a single destination and there are no other
// reads in this loop then is it safe to vectorize: the vectorized stores
// preserve ordering via replication or order-preserving @llvm.masked.scatter.
- if (NumReadWrites == 1 && NumReads == 0) {
+ if (NumReadWrites == 1 && NumReads == 0 && Stores.size() == 1) {
LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
return true;
}
// Build dependence sets and check whether we need a runtime pointer bounds
// check.
- Accesses.buildDependenceSets();
+ Accesses.buildDependenceSets(getDepChecker());
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
@@ -2955,13 +2988,7 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
}
bool LoopAccessInfo::isInvariant(Value *V) const {
- auto *SE = PSE->getSE();
- if (TheLoop->isLoopInvariant(V))
- return true;
- if (!SE->isSCEVable(V->getType()))
- return false;
- const SCEV *S = SE->getSCEV(V);
- return SE->isLoopInvariant(S, TheLoop);
+ return ::isInvariant(V, TheLoop, PSE->getSE());
}
/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll
new file mode 100644
index 0000000000000..bdfee12db5282
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll
@@ -0,0 +1,413 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes='print<access-info>' -disable-output < %s -enable-mem-access-versioning=false 2>&1 | FileCheck %s
+
+; Could be statically known conflict happens, fine for vectorization becuase of
+; the ordered replicated store/scatter semantics.
+define void @waw_no_mask(ptr %p, i64 %stride, i64 %n) {
+; CHECK-LABEL: 'waw_no_mask'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = and i64 %iv, u0xffff0
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ store i64 %iv, ptr %gep
+ store i64 %iv.next, ptr %gep
+
+ %exitcond = icmp slt i64 %iv.next, %n
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Could be statically known conflict happens, unsafe to vectorize.
+; FIXME: https://github.com/llvm/llvm-project/issues/187402
+define void @waw_mask(ptr %p, i64 %stride, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'waw_mask'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = and i64 %iv, u0xffff0
+
+ %c0 = icmp sle i64 %iv, %n0
+ %c1 = icmp sle i64 %iv, %n1
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ br i1 %c0, label %store0, label %merge
+
+store0:
+ store i64 %iv, ptr %gep
+ br label %merge
+
+merge:
+ br i1 %c1, label %store1, label %latch
+
+store1:
+ store i64 %iv.next, ptr %gep
+ br label %latch
+
+latch:
+ %exitcond = icmp slt i64 %iv.next, %n
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Same as @waw_no_mask but with run-time strided access, so can be speculated `%stride != 0`.
+define void @waw_no_mask_unknown_stride(ptr %p, i64 %stride, i64 %n) {
+; CHECK-LABEL: 'waw_no_mask_unknown_stride'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ store i64 %iv, ptr %gep
+ store i64 %iv.next, ptr %gep
+
+ %exitcond = icmp slt i64 %iv.next, %n
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Same as @waw_mask but with run-time strided access, so can be speculated `%stride != 0`.
+; FIXME: https://github.com/llvm/llvm-project/issues/187402
+define void @waw_mask_unknown_stride(ptr %p, i64 %stride, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'waw_mask_unknown_stride'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %c0 = icmp sle i64 %iv, %n0
+ %c1 = icmp sle i64 %iv, %n1
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ br i1 %c0, label %store0, label %merge
+
+store0:
+ store i64 %iv, ptr %gep
+ br label %merge
+
+merge:
+ br i1 %c1, label %store1, label %latch
+
+store1:
+ store i64 %iv.next, ptr %gep
+ br label %latch
+
+latch:
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Safe to vectorize.
+define void @no_cross_iter_dependency(ptr %p, i8 %a, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'no_cross_iter_dependency'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %a.zext = zext i8 %a to i64
+ %stride = add i64 %a.zext, 1 ; known non-zero
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %c0 = icmp sle i64 %iv, %n0
+ %c1 = icmp sle i64 %iv, %n1
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ br i1 %c0, label %store0, label %merge
+
+store0:
+ store i64 %iv, ptr %gep
+ br label %merge
+
+merge:
+ br i1 %c1, label %store1, label %latch
+
+store1:
+ store i64 %iv.next, ptr %gep
+ br label %latch
+
+latch:
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Safe to vectorize.
+define void @const_stride(ptr %p, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'const_stride'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, 5
+
+ %c0 = icmp sle i64 %iv, %n0
+ %c1 = icmp sle i64 %iv, %n1
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ br i1 %c0, label %store0, label %merge
+
+store0:
+ store i64 %iv, ptr %gep
+ br label %merge
+
+merge:
+ br i1 %c1, label %store1, label %latch
+
+store1:
+ store i64 %iv.next, ptr %gep
+ br label %latch
+
+latch:
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+define void @indirect_single_store(ptr noalias %p, i64 %n) {
+; CHECK-LABEL: 'indirect_single_store'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+
+ %gep.ld = getelementptr ptr, ptr %p, i64 %iv
+ %gep = load ptr, ptr %gep.ld
+
+ store i64 %iv, ptr %gep
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+define void @indirect_no_mask(ptr noalias %p, i64 %n) {
+; CHECK-LABEL: 'indirect_no_mask'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+
+ %gep.ld = getelementptr ptr, ptr %p, i64 %iv
+ %gep = load ptr, ptr %gep.ld
+
+ store i64 %iv, ptr %gep
+ store i64 %iv.next, ptr %gep
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+define void @indirect_mask(ptr noalias %p, i64 %n, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'indirect_mask'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ %iv.next = add nsw i64 %iv, 1
+
+ %gep.ld = getelementptr ptr, ptr %p, i64 %iv
+ %gep = load ptr, ptr %gep.ld
+
+ %c0 = icmp sle i64 %iv, %n0
+ %c1 = icmp sle i64 %iv, %n1
+
+ br i1 %c0, label %store0, label %merge
+
+store0:
+ store i64 %iv, ptr %gep
+ br label %merge
+
+merge:
+ br i1 %c1, label %store1, label %latch
+
+store1:
+ store i64 %iv.next, ptr %gep
+ br label %latch
+
+latch:
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
index fabab210fb850..3ea068440ce22 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
@@ -162,35 +162,19 @@ exit:
define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2, ptr %p3, i64 %N) {
; CHECK-LABEL: @store_to_addr_generated_from_invariant_addr(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
-; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P0:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]]
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P1:%.*]], i64 [[IV]]
+; CHECK-NEXT: store ptr [[P0:%.*]], ptr [[ARRAYIDX11]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/187802
More information about the llvm-branch-commits
mailing list