[llvm] 88419a3 - [LICM] Allow load-only scalar promotion in the presence of aliasing loads

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 20 01:02:54 PST 2022


Author: Nikita Popov
Date: 2022-12-20T10:02:46+01:00
New Revision: 88419a30a02de3a35db990399315b0b3e329627e

URL: https://github.com/llvm/llvm-project/commit/88419a30a02de3a35db990399315b0b3e329627e
DIFF: https://github.com/llvm/llvm-project/commit/88419a30a02de3a35db990399315b0b3e329627e.diff

LOG: [LICM] Allow load-only scalar promotion in the presence of aliasing loads

During scalar promotion, if there are additional potentially-aliasing
loads outside the promoted set, we can still perform a load-only
promotion. As the stores are retained, any potentially-aliasing
loads will still read the correct value.

This increases the number of load promotions in llvm-test-suite by
a factor of two:

                                |  Old |  New
    licm.NumPromotionCandidates | 4448 | 6038
    licm.NumLoadPromoted        |  479 | 1069
    licm.NumLoadStorePromoted   | 1459 | 1459

Unfortunately, this does have some impact on compile-time:
http://llvm-compile-time-tracker.com/compare.php?from=57f7f0d6cf0706a88e1ecb74f3d3e8891cceabfa&to=72b811738148aab399966a0435f13b695da1c1c8&stat=instructions
In part this is because we now have less early bailouts from
promotion, but also due to second order effects (e.g. for one case
I looked at we spend more time in SLP now).

Differential Revision: https://reviews.llvm.org/D133192

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/AliasSetTracker.h
    llvm/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/lib/Analysis/AliasSetTracker.cpp
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
    llvm/test/Transforms/LICM/guards.ll
    llvm/test/Transforms/LICM/invariant.start.ll
    llvm/test/Transforms/LICM/promote-unknown-load.ll
    llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
    llvm/unittests/Analysis/AliasSetTrackerTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h
index 23eaa3c9a2acb..e485e1ff2f4c9 100644
--- a/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -40,6 +40,7 @@ class AnyMemTransferInst;
 class BasicBlock;
 class BatchAAResults;
 class LoadInst;
+enum class ModRefInfo : uint8_t;
 class raw_ostream;
 class StoreInst;
 class VAArgInst;
@@ -293,7 +294,8 @@ class AliasSet : public ilist_node<AliasSet> {
   /// set return the appropriate AliasResult. Otherwise return NoAlias.
   AliasResult aliasesPointer(const Value *Ptr, LocationSize Size,
                              const AAMDNodes &AAInfo, BatchAAResults &AA) const;
-  bool aliasesUnknownInst(const Instruction *Inst, BatchAAResults &AA) const;
+  ModRefInfo aliasesUnknownInst(const Instruction *Inst,
+                                BatchAAResults &AA) const;
 };
 
 inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) {

diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index cded98662389f..d63bee6fa3211 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -211,7 +211,7 @@ bool promoteLoopAccessesToScalars(
     PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC,
     const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
     MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
-    bool AllowSpeculation);
+    bool AllowSpeculation, bool HasReadsOutsideSet);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.

diff  --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp
index 526642d65957b..1c9ebadf36493 100644
--- a/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -225,29 +225,34 @@ AliasResult AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
   return AliasResult::NoAlias;
 }
 
-bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
-                                  BatchAAResults &AA) const {
+ModRefInfo AliasSet::aliasesUnknownInst(const Instruction *Inst,
+                                        BatchAAResults &AA) const {
 
   if (AliasAny)
-    return true;
+    return ModRefInfo::ModRef;
 
   if (!Inst->mayReadOrWriteMemory())
-    return false;
+    return ModRefInfo::NoModRef;
 
   for (Instruction *UnknownInst : UnknownInsts) {
     const auto *C1 = dyn_cast<CallBase>(UnknownInst);
     const auto *C2 = dyn_cast<CallBase>(Inst);
     if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) ||
-        isModOrRefSet(AA.getModRefInfo(C2, C1)))
-      return true;
+        isModOrRefSet(AA.getModRefInfo(C2, C1))) {
+      // TODO: Could be more precise, but not really useful right now.
+      return ModRefInfo::ModRef;
+    }
   }
 
-  for (iterator I = begin(), E = end(); I != E; ++I)
-    if (isModOrRefSet(AA.getModRefInfo(
-            Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo()))))
-      return true;
+  ModRefInfo MR = ModRefInfo::NoModRef;
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    MR |= AA.getModRefInfo(
+        Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo()));
+    if (isModAndRefSet(MR))
+      return MR;
+  }
 
-  return false;
+  return MR;
 }
 
 void AliasSetTracker::clear() {
@@ -297,7 +302,7 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
 AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
   AliasSet *FoundSet = nullptr;
   for (AliasSet &AS : llvm::make_early_inc_range(*this)) {
-    if (AS.Forward || !AS.aliasesUnknownInst(Inst, AA))
+    if (AS.Forward || !isModOrRefSet(AS.aliasesUnknownInst(Inst, AA)))
       continue;
     if (!FoundSet) {
       // If this is the first alias set ptr can go into, remember it.

diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index d3739f31bc57e..f7d36a49d6d25 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -179,7 +179,9 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
 
 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
                                 function_ref<void(Instruction *)> Fn);
-static SmallVector<SmallSetVector<Value *, 8>, 0>
+using PointersAndHasReadsOutsideSet =
+    std::pair<SmallSetVector<Value *, 8>, bool>;
+static SmallVector<PointersAndHasReadsOutsideSet, 0>
 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
 
 namespace {
@@ -489,12 +491,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
       bool LocalPromoted;
       do {
         LocalPromoted = false;
-        for (const SmallSetVector<Value *, 8> &PointerMustAliases :
+        for (auto [PointerMustAliases, HasReadsOutsideSet] :
              collectPromotionCandidates(MSSA, AA, L)) {
           LocalPromoted |= promoteLoopAccessesToScalars(
               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
               DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
-              LicmAllowSpeculation);
+              LicmAllowSpeculation, HasReadsOutsideSet);
         }
         Promoted |= LocalPromoted;
       } while (LocalPromoted);
@@ -1953,7 +1955,8 @@ bool llvm::promoteLoopAccessesToScalars(
     LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
     const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
     MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
-    OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
+    OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
+    bool HasReadsOutsideSet) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
          SafetyInfo != nullptr &&
@@ -2028,7 +2031,12 @@ bool llvm::promoteLoopAccessesToScalars(
 
   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
 
-  if (SafetyInfo->anyBlockMayThrow()) {
+  // If there are reads outside the promoted set, then promoting stores is
+  // definitely not safe.
+  if (HasReadsOutsideSet)
+    StoreSafety = StoreUnsafe;
+
+  if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
     // If a loop can throw, we have to insert a store along each unwind edge.
     // That said, we can't actually make the unwind edge explicit. Therefore,
     // we have to prove that the store is dead along the unwind edge.  We do
@@ -2253,7 +2261,9 @@ static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
           Fn(MUD->getMemoryInst());
 }
 
-static SmallVector<SmallSetVector<Value *, 8>, 0>
+// The bool indicates whether there might be reads outside the set, in which
+// case only loads may be promoted.
+static SmallVector<PointersAndHasReadsOutsideSet, 0>
 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
   BatchAAResults BatchAA(*AA);
   AliasSetTracker AST(BatchAA);
@@ -2276,10 +2286,10 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
   });
 
   // We're only interested in must-alias sets that contain a mod.
-  SmallVector<const AliasSet *, 8> Sets;
+  SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
   for (AliasSet &AS : AST)
     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
-      Sets.push_back(&AS);
+      Sets.push_back({&AS, false});
 
   if (Sets.empty())
     return {}; // Nothing to promote...
@@ -2289,17 +2299,28 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
     if (AttemptingPromotion.contains(I))
       return;
 
-    llvm::erase_if(Sets, [&](const AliasSet *AS) {
-      return AS->aliasesUnknownInst(I, BatchAA);
+    llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
+      ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
+      // Cannot promote if there are writes outside the set.
+      if (isModSet(MR))
+        return true;
+      if (isRefSet(MR)) {
+        // Remember reads outside the set.
+        Pair.setInt(true);
+        // If this is a mod-only set and there are reads outside the set,
+        // we will not be able to promote, so bail out early.
+        return !Pair.getPointer()->isRef();
+      }
+      return false;
     });
   });
 
-  SmallVector<SmallSetVector<Value *, 8>, 0> Result;
-  for (const AliasSet *Set : Sets) {
+  SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
+  for (auto [Set, HasReadsOutsideSet] : Sets) {
     SmallSetVector<Value *, 8> PointerMustAliases;
     for (const auto &ASI : *Set)
       PointerMustAliases.insert(ASI.getValue());
-    Result.push_back(std::move(PointerMustAliases));
+    Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
   }
 
   return Result;

diff  --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 08fb010285a1d..626188805e374 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1329,7 +1329,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
       // can't reroll.
       if (RootInst->mayReadFromMemory()) {
         for (auto &K : AST) {
-          if (K.aliasesUnknownInst(RootInst, BatchAA)) {
+          if (isModOrRefSet(K.aliasesUnknownInst(RootInst, BatchAA))) {
             LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
                               << *BaseInst << " vs. " << *RootInst
                               << " (depends on future store)\n");

diff  --git a/llvm/test/Transforms/LICM/guards.ll b/llvm/test/Transforms/LICM/guards.ll
index 231e770208d9b..fcfe299d656e6 100644
--- a/llvm/test/Transforms/LICM/guards.ll
+++ b/llvm/test/Transforms/LICM/guards.ll
@@ -27,7 +27,7 @@ loop:
   br label %loop
 }
 
-; Can't hoist over a side effect
+; Can't hoist over a side effect, but can still promote and fold the load.
 define void @test2(i1 %cond, ptr %ptr) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
@@ -36,8 +36,7 @@ define void @test2(i1 %cond, ptr %ptr) {
 ; CHECK-NEXT:    [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    store i32 0, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[COND:%.*]]) [ "deopt"(i32 0) ]
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    [[X_INC]] = add i32 [[X]], [[VAL]]
+; CHECK-NEXT:    [[X_INC]] = add i32 [[X]], 0
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 

diff  --git a/llvm/test/Transforms/LICM/invariant.start.ll b/llvm/test/Transforms/LICM/invariant.start.ll
index aba38829e0028..1ba646b8858ad 100644
--- a/llvm/test/Transforms/LICM/invariant.start.ll
+++ b/llvm/test/Transforms/LICM/invariant.start.ll
@@ -87,8 +87,7 @@ define void @test4(i1 %cond, ptr %ptr) {
 ; CHECK-NEXT:    [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    store i32 0, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[PTR]])
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    [[X_INC]] = add i32 [[X]], [[VAL]]
+; CHECK-NEXT:    [[X_INC]] = add i32 [[X]], 0
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/LICM/promote-unknown-load.ll b/llvm/test/Transforms/LICM/promote-unknown-load.ll
index 4cddfdcbafd16..4105df215409f 100644
--- a/llvm/test/Transforms/LICM/promote-unknown-load.ll
+++ b/llvm/test/Transforms/LICM/promote-unknown-load.ll
@@ -1,22 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=licm < %s | FileCheck %s
 
-; FIXME: The %val.ptr load might alias the %pos.ptr load/stores, but it's still
+; The %val.ptr load might alias the %pos.ptr load/stores, but it's still
 ; fine to promote the load as long as the store is retained.
 define i32 @test(ptr %ary, i64 %len) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[POS_PTR:%.*]] = getelementptr i8, ptr [[ARY:%.*]], i64 32
+; CHECK-NEXT:    [[POS_PTR_PROMOTED:%.*]] = load i64, ptr [[POS_PTR]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[POS:%.*]] = load i64, ptr [[POS_PTR]], align 4
-; CHECK-NEXT:    [[POS_NEXT:%.*]] = add i64 [[POS]], 1
+; CHECK-NEXT:    [[POS_NEXT1:%.*]] = phi i64 [ [[POS_PTR_PROMOTED]], [[ENTRY:%.*]] ], [ [[POS_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[POS_NEXT]] = add i64 [[POS_NEXT1]], 1
 ; CHECK-NEXT:    store i64 [[POS_NEXT]], ptr [[POS_PTR]], align 4
-; CHECK-NEXT:    [[VAL_PTR:%.*]] = getelementptr i32, ptr [[ARY]], i64 [[POS]]
+; CHECK-NEXT:    [[VAL_PTR:%.*]] = getelementptr i32, ptr [[ARY]], i64 [[POS_NEXT1]]
 ; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[VAL_PTR]], align 4
 ; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL]]
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[POS]], [[LEN:%.*]]
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ult i64 [[POS_NEXT1]], [[LEN:%.*]]
 ; CHECK-NEXT:    br i1 [[EXIT_COND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP]] ]

diff  --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 0d82bea4c1fae..2dab5f29d6598 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -333,7 +333,6 @@ for.end:                                          ; preds = %for.body
 }
 
 ; Multiple variant stores to the same uniform address
-; We do not vectorize such loops currently.
 ;  for(; i < itr; i++) {
 ;    for(; j < itr; j++) {
 ;      var1[i] = var2[j] + var1[i];
@@ -347,28 +346,80 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP20]], label [[FOR_END10:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader.preheader:
+; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[VAR2:%.*]], i64 4
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader:
 ; CHECK-NEXT:    [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[J_022:%.*]] = phi i32 [ [[J_1_LCSSA:%.*]], [[FOR_INC8]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV23]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i64 [[TMP0]], 4
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[VAR1]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]]
 ; CHECK-NEXT:    br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]]
 ; CHECK:       for.body3.lr.ph:
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]]
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[J_022]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[J_022]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[J_022]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], [[ITR]]
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 3
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 2
+; CHECK-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[VAR2]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i32 [[J_022]], -1
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], [[ITR]]
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = add nuw nsw i64 [[TMP2]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 2
+; CHECK-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP3]], i64 [[TMP12]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[UGLYGEP]], [[UGLYGEP4]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[UGLYGEP2]], [[UGLYGEP1]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP6]], -4
+; CHECK-NEXT:    [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP2]]
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX5_PROMOTED]], i64 0
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[INDEX]], [[TMP2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4, !alias.scope !22
+; CHECK-NEXT:    [[TMP15:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP16]] = add <4 x i32> [[TMP15]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP16]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
+; CHECK-NEXT:    store i32 [[TMP18]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP2]], [[FOR_BODY3_LR_PH]] ], [ [[TMP2]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[ADD]], 1
-; CHECK-NEXT:    store i32 [[TMP3]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    [[TMP21]] = add nsw i32 [[ADD]], 1
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[FOR_BODY3]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK:       for.inc8.loopexit.loopexit:
+; CHECK-NEXT:    br label [[FOR_INC8_LOOPEXIT]]
 ; CHECK:       for.inc8.loopexit:
 ; CHECK-NEXT:    br label [[FOR_INC8]]
 ; CHECK:       for.inc8:
@@ -440,21 +491,22 @@ define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocaptu
 ; CHECK:       for.body3.lr.ph:
 ; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[J_022]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[ADD]], 42
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[COND_STORE:%.*]], label [[LATCH]]
 ; CHECK:       cond_store:
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[ADD]], 1
 ; CHECK-NEXT:    br label [[LATCH]]
 ; CHECK:       latch:
-; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ]
-; CHECK-NEXT:    store i32 [[STOREMERGE]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[TMP5]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    store i32 [[TMP5]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]]

diff  --git a/llvm/unittests/Analysis/AliasSetTrackerTest.cpp b/llvm/unittests/Analysis/AliasSetTrackerTest.cpp
index d2ee4db262da5..bdc37d88bd69b 100644
--- a/llvm/unittests/Analysis/AliasSetTrackerTest.cpp
+++ b/llvm/unittests/Analysis/AliasSetTrackerTest.cpp
@@ -83,7 +83,7 @@ TEST(AliasSetTracker, AliasUnknownInst) {
     for (AliasSet &AS : AST) {
       if (!Inst.mayReadOrWriteMemory())
         continue;
-      if (!AS.aliasesUnknownInst(&Inst, BatchAA))
+      if (!isModOrRefSet(AS.aliasesUnknownInst(&Inst, BatchAA)))
         continue;
       ASSERT_NE(FoundAS, true);
       FoundAS = true;


        


More information about the llvm-commits mailing list