[llvm] 88419a3 - [LICM] Allow load-only scalar promotion in the presence of aliasing loads
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 20 01:02:54 PST 2022
Author: Nikita Popov
Date: 2022-12-20T10:02:46+01:00
New Revision: 88419a30a02de3a35db990399315b0b3e329627e
URL: https://github.com/llvm/llvm-project/commit/88419a30a02de3a35db990399315b0b3e329627e
DIFF: https://github.com/llvm/llvm-project/commit/88419a30a02de3a35db990399315b0b3e329627e.diff
LOG: [LICM] Allow load-only scalar promotion in the presence of aliasing loads
During scalar promotion, if there are additional potentially-aliasing
loads outside the promoted set, we can still perform a load-only
promotion. As the stores are retained, any potentially-aliasing
loads will still read the correct value.
This increases the number of load promotions in llvm-test-suite by
a factor of two:
| Old | New
licm.NumPromotionCandidates | 4448 | 6038
licm.NumLoadPromoted | 479 | 1069
licm.NumLoadStorePromoted | 1459 | 1459
Unfortunately, this does have some impact on compile-time:
http://llvm-compile-time-tracker.com/compare.php?from=57f7f0d6cf0706a88e1ecb74f3d3e8891cceabfa&to=72b811738148aab399966a0435f13b695da1c1c8&stat=instructions
In part this is because we now have less early bailouts from
promotion, but also due to second order effects (e.g. for one case
I looked at we spend more time in SLP now).
Differential Revision: https://reviews.llvm.org/D133192
Added:
Modified:
llvm/include/llvm/Analysis/AliasSetTracker.h
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/lib/Analysis/AliasSetTracker.cpp
llvm/lib/Transforms/Scalar/LICM.cpp
llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
llvm/test/Transforms/LICM/guards.ll
llvm/test/Transforms/LICM/invariant.start.ll
llvm/test/Transforms/LICM/promote-unknown-load.ll
llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
llvm/unittests/Analysis/AliasSetTrackerTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h
index 23eaa3c9a2acb..e485e1ff2f4c9 100644
--- a/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -40,6 +40,7 @@ class AnyMemTransferInst;
class BasicBlock;
class BatchAAResults;
class LoadInst;
+enum class ModRefInfo : uint8_t;
class raw_ostream;
class StoreInst;
class VAArgInst;
@@ -293,7 +294,8 @@ class AliasSet : public ilist_node<AliasSet> {
/// set return the appropriate AliasResult. Otherwise return NoAlias.
AliasResult aliasesPointer(const Value *Ptr, LocationSize Size,
const AAMDNodes &AAInfo, BatchAAResults &AA) const;
- bool aliasesUnknownInst(const Instruction *Inst, BatchAAResults &AA) const;
+ ModRefInfo aliasesUnknownInst(const Instruction *Inst,
+ BatchAAResults &AA) const;
};
inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) {
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index cded98662389f..d63bee6fa3211 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -211,7 +211,7 @@ bool promoteLoopAccessesToScalars(
PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC,
const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
- bool AllowSpeculation);
+ bool AllowSpeculation, bool HasReadsOutsideSet);
/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.
diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp
index 526642d65957b..1c9ebadf36493 100644
--- a/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -225,29 +225,34 @@ AliasResult AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
return AliasResult::NoAlias;
}
-bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
- BatchAAResults &AA) const {
+ModRefInfo AliasSet::aliasesUnknownInst(const Instruction *Inst,
+ BatchAAResults &AA) const {
if (AliasAny)
- return true;
+ return ModRefInfo::ModRef;
if (!Inst->mayReadOrWriteMemory())
- return false;
+ return ModRefInfo::NoModRef;
for (Instruction *UnknownInst : UnknownInsts) {
const auto *C1 = dyn_cast<CallBase>(UnknownInst);
const auto *C2 = dyn_cast<CallBase>(Inst);
if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) ||
- isModOrRefSet(AA.getModRefInfo(C2, C1)))
- return true;
+ isModOrRefSet(AA.getModRefInfo(C2, C1))) {
+ // TODO: Could be more precise, but not really useful right now.
+ return ModRefInfo::ModRef;
+ }
}
- for (iterator I = begin(), E = end(); I != E; ++I)
- if (isModOrRefSet(AA.getModRefInfo(
- Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo()))))
- return true;
+ ModRefInfo MR = ModRefInfo::NoModRef;
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ MR |= AA.getModRefInfo(
+ Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo()));
+ if (isModAndRefSet(MR))
+ return MR;
+ }
- return false;
+ return MR;
}
void AliasSetTracker::clear() {
@@ -297,7 +302,7 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = nullptr;
for (AliasSet &AS : llvm::make_early_inc_range(*this)) {
- if (AS.Forward || !AS.aliasesUnknownInst(Inst, AA))
+ if (AS.Forward || !isModOrRefSet(AS.aliasesUnknownInst(Inst, AA)))
continue;
if (!FoundSet) {
// If this is the first alias set ptr can go into, remember it.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index d3739f31bc57e..f7d36a49d6d25 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -179,7 +179,9 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
-static SmallVector<SmallSetVector<Value *, 8>, 0>
+using PointersAndHasReadsOutsideSet =
+ std::pair<SmallSetVector<Value *, 8>, bool>;
+static SmallVector<PointersAndHasReadsOutsideSet, 0>
collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
namespace {
@@ -489,12 +491,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
bool LocalPromoted;
do {
LocalPromoted = false;
- for (const SmallSetVector<Value *, 8> &PointerMustAliases :
+ for (auto [PointerMustAliases, HasReadsOutsideSet] :
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
- LicmAllowSpeculation);
+ LicmAllowSpeculation, HasReadsOutsideSet);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
@@ -1953,7 +1955,8 @@ bool llvm::promoteLoopAccessesToScalars(
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
+ OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
+ bool HasReadsOutsideSet) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -2028,7 +2031,12 @@ bool llvm::promoteLoopAccessesToScalars(
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
- if (SafetyInfo->anyBlockMayThrow()) {
+ // If there are reads outside the promoted set, then promoting stores is
+ // definitely not safe.
+ if (HasReadsOutsideSet)
+ StoreSafety = StoreUnsafe;
+
+ if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
// If a loop can throw, we have to insert a store along each unwind edge.
// That said, we can't actually make the unwind edge explicit. Therefore,
// we have to prove that the store is dead along the unwind edge. We do
@@ -2253,7 +2261,9 @@ static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
Fn(MUD->getMemoryInst());
}
-static SmallVector<SmallSetVector<Value *, 8>, 0>
+// The bool indicates whether there might be reads outside the set, in which
+// case only loads may be promoted.
+static SmallVector<PointersAndHasReadsOutsideSet, 0>
collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
BatchAAResults BatchAA(*AA);
AliasSetTracker AST(BatchAA);
@@ -2276,10 +2286,10 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
});
// We're only interested in must-alias sets that contain a mod.
- SmallVector<const AliasSet *, 8> Sets;
+ SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
for (AliasSet &AS : AST)
if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
- Sets.push_back(&AS);
+ Sets.push_back({&AS, false});
if (Sets.empty())
return {}; // Nothing to promote...
@@ -2289,17 +2299,28 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
if (AttemptingPromotion.contains(I))
return;
- llvm::erase_if(Sets, [&](const AliasSet *AS) {
- return AS->aliasesUnknownInst(I, BatchAA);
+ llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
+ ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
+ // Cannot promote if there are writes outside the set.
+ if (isModSet(MR))
+ return true;
+ if (isRefSet(MR)) {
+ // Remember reads outside the set.
+ Pair.setInt(true);
+ // If this is a mod-only set and there are reads outside the set,
+ // we will not be able to promote, so bail out early.
+ return !Pair.getPointer()->isRef();
+ }
+ return false;
});
});
- SmallVector<SmallSetVector<Value *, 8>, 0> Result;
- for (const AliasSet *Set : Sets) {
+ SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
+ for (auto [Set, HasReadsOutsideSet] : Sets) {
SmallSetVector<Value *, 8> PointerMustAliases;
for (const auto &ASI : *Set)
PointerMustAliases.insert(ASI.getValue());
- Result.push_back(std::move(PointerMustAliases));
+ Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
}
return Result;
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 08fb010285a1d..626188805e374 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1329,7 +1329,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// can't reroll.
if (RootInst->mayReadFromMemory()) {
for (auto &K : AST) {
- if (K.aliasesUnknownInst(RootInst, BatchAA)) {
+ if (isModOrRefSet(K.aliasesUnknownInst(RootInst, BatchAA))) {
LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
<< *BaseInst << " vs. " << *RootInst
<< " (depends on future store)\n");
diff --git a/llvm/test/Transforms/LICM/guards.ll b/llvm/test/Transforms/LICM/guards.ll
index 231e770208d9b..fcfe299d656e6 100644
--- a/llvm/test/Transforms/LICM/guards.ll
+++ b/llvm/test/Transforms/LICM/guards.ll
@@ -27,7 +27,7 @@ loop:
br label %loop
}
-; Can't hoist over a side effect
+; Can't hoist over a side effect, but can still promote and fold the load.
define void @test2(i1 %cond, ptr %ptr) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
@@ -36,8 +36,7 @@ define void @test2(i1 %cond, ptr %ptr) {
; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND:%.*]]) [ "deopt"(i32 0) ]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]]
+; CHECK-NEXT: [[X_INC]] = add i32 [[X]], 0
; CHECK-NEXT: br label [[LOOP]]
;
diff --git a/llvm/test/Transforms/LICM/invariant.start.ll b/llvm/test/Transforms/LICM/invariant.start.ll
index aba38829e0028..1ba646b8858ad 100644
--- a/llvm/test/Transforms/LICM/invariant.start.ll
+++ b/llvm/test/Transforms/LICM/invariant.start.ll
@@ -87,8 +87,7 @@ define void @test4(i1 %cond, ptr %ptr) {
; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[PTR]])
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]]
+; CHECK-NEXT: [[X_INC]] = add i32 [[X]], 0
; CHECK-NEXT: br label [[LOOP]]
;
entry:
diff --git a/llvm/test/Transforms/LICM/promote-unknown-load.ll b/llvm/test/Transforms/LICM/promote-unknown-load.ll
index 4cddfdcbafd16..4105df215409f 100644
--- a/llvm/test/Transforms/LICM/promote-unknown-load.ll
+++ b/llvm/test/Transforms/LICM/promote-unknown-load.ll
@@ -1,22 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=licm < %s | FileCheck %s
-; FIXME: The %val.ptr load might alias the %pos.ptr load/stores, but it's still
+; The %val.ptr load might alias the %pos.ptr load/stores, but it's still
; fine to promote the load as long as the store is retained.
define i32 @test(ptr %ary, i64 %len) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[POS_PTR:%.*]] = getelementptr i8, ptr [[ARY:%.*]], i64 32
+; CHECK-NEXT: [[POS_PTR_PROMOTED:%.*]] = load i64, ptr [[POS_PTR]], align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[POS:%.*]] = load i64, ptr [[POS_PTR]], align 4
-; CHECK-NEXT: [[POS_NEXT:%.*]] = add i64 [[POS]], 1
+; CHECK-NEXT: [[POS_NEXT1:%.*]] = phi i64 [ [[POS_PTR_PROMOTED]], [[ENTRY:%.*]] ], [ [[POS_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[POS_NEXT]] = add i64 [[POS_NEXT1]], 1
; CHECK-NEXT: store i64 [[POS_NEXT]], ptr [[POS_PTR]], align 4
-; CHECK-NEXT: [[VAL_PTR:%.*]] = getelementptr i32, ptr [[ARY]], i64 [[POS]]
+; CHECK-NEXT: [[VAL_PTR:%.*]] = getelementptr i32, ptr [[ARY]], i64 [[POS_NEXT1]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[VAL_PTR]], align 4
; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL]]
-; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i64 [[POS]], [[LEN:%.*]]
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i64 [[POS_NEXT1]], [[LEN:%.*]]
; CHECK-NEXT: br i1 [[EXIT_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 0d82bea4c1fae..2dab5f29d6598 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -333,7 +333,6 @@ for.end: ; preds = %for.body
}
; Multiple variant stores to the same uniform address
-; We do not vectorize such loops currently.
; for(; i < itr; i++) {
; for(; j < itr; j++) {
; var1[i] = var2[j] + var1[i];
@@ -347,28 +346,80 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0
; CHECK-NEXT: br i1 [[CMP20]], label [[FOR_END10:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]]
; CHECK: for.cond1.preheader.preheader:
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[VAR2:%.*]], i64 4
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; CHECK: for.cond1.preheader:
; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
; CHECK-NEXT: [[J_022:%.*]] = phi i32 [ [[J_1_LCSSA:%.*]], [[FOR_INC8]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV23]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[VAR1]], i64 [[TMP1]]
; CHECK-NEXT: [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]]
; CHECK-NEXT: br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]]
; CHECK: for.body3.lr.ph:
-; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]]
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[J_022]] to i64
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[J_022]] to i64
+; CHECK-NEXT: [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[J_022]], -1
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], [[ITR]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 3
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 2
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[VAR2]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[J_022]], -1
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[ITR]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP2]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 2
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP3]], i64 [[TMP12]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[UGLYGEP]], [[UGLYGEP4]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UGLYGEP2]], [[UGLYGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP6]], -4
+; CHECK-NEXT: [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX5_PROMOTED]], i64 0
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[INDEX]], [[TMP2]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4, !alias.scope !22
+; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP16]] = add <4 x i32> [[TMP15]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP16]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP2]], [[FOR_BODY3_LR_PH]] ], [ [[TMP2]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY3:%.*]]
; CHECK: for.body3:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[ADD]], 1
-; CHECK-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP21]] = add nsw i32 [[ADD]], 1
+; CHECK-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[FOR_BODY3]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK: for.inc8.loopexit.loopexit:
+; CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT]]
; CHECK: for.inc8.loopexit:
; CHECK-NEXT: br label [[FOR_INC8]]
; CHECK: for.inc8:
@@ -440,21 +491,22 @@ define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocaptu
; CHECK: for.body3.lr.ph:
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]]
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[J_022]] to i64
+; CHECK-NEXT: [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: br label [[FOR_BODY3:%.*]]
; CHECK: for.body3:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[ADD]], 42
; CHECK-NEXT: br i1 [[TMP3]], label [[COND_STORE:%.*]], label [[LATCH]]
; CHECK: cond_store:
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[ADD]], 1
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
-; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ]
-; CHECK-NEXT: store i32 [[STOREMERGE]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: [[TMP5]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ]
+; CHECK-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]]
diff --git a/llvm/unittests/Analysis/AliasSetTrackerTest.cpp b/llvm/unittests/Analysis/AliasSetTrackerTest.cpp
index d2ee4db262da5..bdc37d88bd69b 100644
--- a/llvm/unittests/Analysis/AliasSetTrackerTest.cpp
+++ b/llvm/unittests/Analysis/AliasSetTrackerTest.cpp
@@ -83,7 +83,7 @@ TEST(AliasSetTracker, AliasUnknownInst) {
for (AliasSet &AS : AST) {
if (!Inst.mayReadOrWriteMemory())
continue;
- if (!AS.aliasesUnknownInst(&Inst, BatchAA))
+ if (!isModOrRefSet(AS.aliasesUnknownInst(&Inst, BatchAA)))
continue;
ASSERT_NE(FoundAS, true);
FoundAS = true;
More information about the llvm-commits
mailing list