[llvm] [LAA] Be more precise on different store sizes (PR #122318)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 7 05:56:58 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/122318
>From ac63260486f5e5dc78ab43bca7316b5c6a3cb4b6 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 9 Jan 2025 16:50:04 +0000
Subject: [PATCH] [LAA] Be more precise on different store sizes
The HasSameSize checks, which are triggered on different store sizes, in
MemDepChecker::isDependent are ad-hoc and imprecise, leading to spurious
dependencies and runtime-checks. Identify that the exact scenario in
which to bail out is unequal store sizes when dependence distance is
possibly zero, and check precisely this condition in
MemDepChecker::getDependenceDistanceAndSize, eliminating all the ad-hoc
checks in isDependent and making LoopAccessAnalysis more precise.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 13 ++---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 51 +++++++++----------
.../forward-loop-carried.ll | 4 --
...endence-distance-different-access-sizes.ll | 17 ++-----
...interleave-allocsize-not-equal-typesize.ll | 40 ++++++++++++---
.../interleaved-accesses-use-after-free.ll | 23 ++-------
.../reuse-lcssa-phi-scev-expansion.ll | 41 ++-------------
7 files changed, 73 insertions(+), 116 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 1415da14a3494..e30150f29a3d3 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -396,8 +396,7 @@ class MemoryDepChecker {
uint64_t MaxStride;
std::optional<uint64_t> CommonStride;
- /// TypeByteSize is either the common store size of both accesses, or 0 when
- /// store sizes mismatch.
+ /// TypeByteSize is the store size of the sink.
uint64_t TypeByteSize;
bool AIsWrite;
@@ -412,13 +411,9 @@ class MemoryDepChecker {
};
/// Get the dependence distance, strides, type size and whether it is a write
- /// for the dependence between A and B. Returns a DepType, if we can prove
- /// there's no dependence or the analysis fails. Outlined to lambda to limit
- /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and
- /// others. Returns either the dependence result, if it could already be
- /// determined, or a DepDistanceStrideAndSizeInfo struct, noting that
- /// TypeByteSize could be 0 when store sizes mismatch, and this should be
- /// checked in the caller.
+ /// for the dependence between A and B. Returns either a DepType, the
+ /// dependence result, if it could already be determined, or a
+ /// DepDistanceStrideAndSizeInfo struct.
std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
const MemAccessInfo &B,
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b6dc5c487475e..67eda1cd4115c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2070,7 +2070,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// it in the caller isDependent.
uint64_t ASz = DL.getTypeAllocSize(ATy);
uint64_t BSz = DL.getTypeAllocSize(BTy);
- uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
+
+ // The TypeByteSize is used to scale Distance and VF. In these contexts, the
+ // only size that matters is the size of the Sink.
+ uint64_t TypeByteSize = BSz;
uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
@@ -2081,12 +2084,27 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
if (StrideAScaled == StrideBScaled)
CommonStride = StrideAScaled;
+ // If the distance is a SCEVCouldNotCompute, fail immediately.
+ if (isa<SCEVCouldNotCompute>(Dist)) {
+ LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
+ return Dependence::Unknown;
+ }
+
// TODO: FoundNonConstantDistanceDependence is used as a necessary condition
// to consider retrying with runtime checks. Historically, we did not set it
// when (unscaled) strides were different but there is no inherent reason to.
if (!isa<SCEVConstant>(Dist))
FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt;
+ // When the distance is possibly zero, we're reading/writing the same memory
+ // location: if the store sizes are not equal, fail with an unknown
+ // dependence.
+ if (!SE.isKnownNonZero(Dist) && AStoreSz != BStoreSz) {
+ LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence distance with "
+ "different type sizes\n");
+ return Dependence::Unknown;
+ }
+
return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
TypeByteSize, AIsWrite, BIsWrite);
}
@@ -2105,12 +2123,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
std::get<DepDistanceStrideAndSizeInfo>(Res);
- bool HasSameSize = TypeByteSize > 0;
-
- if (isa<SCEVCouldNotCompute>(Dist)) {
- LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
- return Dependence::Unknown;
- }
ScalarEvolution &SE = *PSE.getSE();
auto &DL = InnermostLoop->getHeader()->getDataLayout();
@@ -2120,8 +2132,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// upper bound of the number of iterations), the accesses are independet, i.e.
// they are far enough appart that accesses won't access the same location
// across all loop ierations.
- if (HasSameSize &&
- isSafeDependenceDistance(
+ if (isSafeDependenceDistance(
DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
return Dependence::NoDep;
@@ -2135,7 +2146,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (APDist) {
// If the distance between accesses and their strides are known constants,
// check whether the accesses interlace each other.
- if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
+ if (ConstDist > 0 && CommonStride && CommonStride > 1 &&
areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
@@ -2149,15 +2160,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Negative distances are not plausible dependencies.
if (SE.isKnownNonPositive(Dist)) {
- if (SE.isKnownNonNegative(Dist)) {
- if (HasSameSize) {
- // Write to the same location with the same size.
- return Dependence::Forward;
- }
- LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
- "different type sizes\n");
- return Dependence::Unknown;
- }
+ if (SE.isKnownNonNegative(Dist))
+ // Write to the same location with the same size.
+ return Dependence::Forward;
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
// Check if the first access writes to a location that is read in a later
@@ -2171,8 +2176,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (IsTrueDataDependence && EnableForwardingConflictDetection) {
if (!ConstDist)
return Dependence::Unknown;
- if (!HasSameSize ||
- couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
+ if (couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
LLVM_DEBUG(
dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
return Dependence::ForwardButPreventsForwarding;
@@ -2188,11 +2192,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (MinDistance <= 0)
return Dependence::Unknown;
- if (!HasSameSize) {
- LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
- "different type sizes\n");
- return Dependence::Unknown;
- }
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
VectorizerParams::VectorizationFactor : 1);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
index adfd19923e921..7837c20f003e2 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
@@ -70,10 +70,6 @@ define void @forward_different_access_sizes(ptr readnone %end, ptr %start) {
; CHECK-NEXT: store i32 0, ptr %gep.2, align 4 ->
; CHECK-NEXT: %l = load i24, ptr %gep.1, align 1
; CHECK-EMPTY:
-; CHECK-NEXT: Forward:
-; CHECK-NEXT: store i32 0, ptr %gep.2, align 4 ->
-; CHECK-NEXT: store i24 %l, ptr %ptr.iv, align 1
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
index 1a6e25859f085..c966bc4e7d028 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
@@ -8,21 +8,14 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define void @test_distance_positive_independent_via_trip_count(ptr %A) {
; CHECK-LABEL: 'test_distance_positive_independent_via_trip_count'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 3200 bits
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: BackwardVectorizable:
+; CHECK-NEXT: %l = load i8, ptr %gep.A, align 1 ->
+; CHECK-NEXT: store i32 %ext, ptr %gep.A.400, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
-; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group GRP0:
-; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.400, i64 %iv
-; CHECK-NEXT: Against group GRP1:
-; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: (400 + %A)<nuw> High: (804 + %A))
-; CHECK-NEXT: Member: {(400 + %A)<nuw>,+,4}<nuw><%loop>
-; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: (101 + %A))
-; CHECK-NEXT: Member: {%A,+,1}<nuw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
index 79d7ab84b3a0f..c48ae69093e99 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
@@ -35,10 +35,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope !0
-; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope !0
-; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope !0
-; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope !0
+; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2
@@ -47,7 +47,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
-; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -96,17 +96,41 @@ exit:
define void @pr58722_store_interleave_group(ptr %src, ptr %dst) {
; CHECK-LABEL: @pr58722_store_interleave_group(
; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP1]]
+; CHECK-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP2]], align 4
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[OFFSET_IDX]] to i24
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP1]] to i24
+; CHECK-NEXT: store i24 [[TMP6]], ptr [[TMP4]], align 4
+; CHECK-NEXT: store i24 [[TMP7]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 5000
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[IV]]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[IV]]
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_IV]], align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[GEP_IV]], i64 1
; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i32 [[IV]] to i24
; CHECK-NEXT: store i24 [[TRUNC_IV]], ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 10000
-; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll
index d5239d5a4e33d..e89793c80fd65 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -debug-only=loop-accesses -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s -check-prefix=LOOP-ACCESS
-; RUN: opt -passes=loop-vectorize -debug-only=vectorutils -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s -check-prefix=LV
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-redhat-linux-gnu"
@@ -24,27 +24,10 @@ target triple = "x86_64-redhat-linux-gnu"
%struct.baz = type { i64, %struct.pluto }
; LOOP-ACCESS: Too many dependences, stopped recording
-
-; If no dependences are recorded because there are too many, LoopAccessAnalysis
-; just conservatively returns true for any pair of instructions compared (even
-; those belonging to the same store group). This tests make sure that we do not
-; incorrectly release a store group which had no dependences between its
-; members, even if we have no dependences recorded because there are too many.
-
-; CHECK: LV: Creating an interleave group with: store ptr null, ptr %phi5, align 8
-; CHECK: LV: Inserted: store ptr %load12, ptr %getelementptr11, align 8
-; CHECK: into the interleave group with store ptr null, ptr %phi5
-; CHECK: LV: Inserted: store ptr %load7, ptr %getelementptr, align 8
-; CHECK: into the interleave group with store ptr null, ptr %phi5
-
-; CHECK: LV: Creating an interleave group with: store ptr null, ptr %getelementptr13, align 8
-; CHECK: LV: Inserted: store ptr null, ptr %phi6, align 8
-; CHECK: into the interleave group with store ptr null, ptr %getelementptr13
-; CHECK: LV: Invalidated store group due to dependence between store ptr %load7, ptr %getelementptr, align 8 and store ptr null, ptr %getelementptr13, align 8
-; CHECK-NOT: LV: Invalidated store group due to dependence between
+; LV: Can't vectorize due to memory conflicts
; Note: The (only) invalidated store group is the one containing A (store ptr %load7, ptr %getelementptr, align 8) which is:
-; Group with instructions:
+; Group with instructions:
; store ptr null, ptr %phi5, align 8
; store ptr %load7, ptr %getelementptr, align 8
; store ptr %load12, ptr %getelementptr11, align 8
diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
index 2747895f06a7b..18a62e88bb316 100644
--- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
@@ -106,55 +106,22 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) {
; CHECK-LABEL: define void @runtime_checks_ptr_inductions(
; CHECK-SAME: ptr [[DST_1:%.*]], ptr [[DST_2:%.*]], i1 [[C:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[DST_11:%.*]] = ptrtoint ptr [[DST_1]] to i64
; CHECK-NEXT: br label %[[LOOP_1:.*]]
; CHECK: [[LOOP_1]]:
-; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], %[[LOOP_1]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[DST_1]], %[[ENTRY]] ], [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[CALL:%.*]] = call i32 @val()
; CHECK-NEXT: [[SEL_DST:%.*]] = select i1 [[C]], ptr [[DST_1]], ptr [[DST_2]]
; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr i8, ptr [[PTR_IV_1]], i64 1
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EC_1]], label %[[LOOP_2_HEADER_PREHEADER:.*]], label %[[LOOP_1]]
; CHECK: [[LOOP_2_HEADER_PREHEADER]]:
-; CHECK-NEXT: [[SEL_DST_LCSSA2:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], %[[LOOP_1]] ]
; CHECK-NEXT: [[PTR_IV_1_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1]], %[[LOOP_1]] ]
; CHECK-NEXT: [[SEL_DST_LCSSA:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[SEL_DST_LCSSA23:%.*]] = ptrtoint ptr [[SEL_DST_LCSSA2]] to i64
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
-; CHECK: [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDVAR_LCSSA]], [[DST_11]]
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST_LCSSA23]]
-; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2
-; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 1022
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SEL_DST_LCSSA]], i64 1022
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 [[INDEX]]
-; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[SEL_DST_LCSSA]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: store <2 x i8> [[WIDE_LOAD]], ptr [[TMP5]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[SCALAR_PH]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1023, %[[MIDDLE_BLOCK]] ], [ 1, %[[LOOP_2_HEADER_PREHEADER]] ], [ 1, %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[PTR_IV_1_LCSSA]], %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[SEL_DST_LCSSA]], %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]]
; CHECK: [[LOOP_2_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ]
-; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ 1, %[[LOOP_2_HEADER_PREHEADER]] ]
+; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ]
+; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ]
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i32 [[IV]], 1024
; CHECK-NEXT: br i1 [[EC_2]], label %[[EXIT:.*]], label %[[LOOP_2_LATCH]]
; CHECK: [[LOOP_2_LATCH]]:
@@ -163,7 +130,7 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) {
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV_3]], align 1
; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 1
; CHECK-NEXT: store i8 [[L]], ptr [[PTR_IV_2]], align 1
-; CHECK-NEXT: br label %[[LOOP_2_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br label %[[LOOP_2_HEADER]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list