[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79175 (PR #80274)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Feb 1 03:05:37 PST 2024


https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/80274

resolves llvm/llvm-project#79175

>From ee8c0bd8ff69caca659662961e2b5dfaee907a7a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 24 Jan 2024 12:33:57 +0100
Subject: [PATCH 1/3] [Loads] Use BatchAAResults for available value APIs
 (NFCI)

This allows caching AA queries both within and across the calls,
and enables us to use a custom AAQI configuration.

(cherry picked from commit 89dae798cc77789a43e9a60173f647dae03a65fe)
---
 llvm/include/llvm/Analysis/Loads.h                   | 12 ++++++------
 llvm/lib/Analysis/Lint.cpp                           |  3 ++-
 llvm/lib/Analysis/Loads.cpp                          |  9 ++++-----
 .../InstCombine/InstCombineLoadStoreAlloca.cpp       |  3 ++-
 llvm/lib/Transforms/Scalar/JumpThreading.cpp         | 11 ++++++-----
 5 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 2880ed33a34cb..0926093bba99d 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -18,7 +18,7 @@
 
 namespace llvm {
 
-class AAResults;
+class BatchAAResults;
 class AssumptionCache;
 class DataLayout;
 class DominatorTree;
@@ -129,11 +129,10 @@ extern cl::opt<unsigned> DefMaxInstsToScan;
 /// location in memory, as opposed to the value operand of a store.
 ///
 /// \returns The found value, or nullptr if no value is found.
-Value *FindAvailableLoadedValue(LoadInst *Load,
-                                BasicBlock *ScanBB,
+Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
                                 BasicBlock::iterator &ScanFrom,
                                 unsigned MaxInstsToScan = DefMaxInstsToScan,
-                                AAResults *AA = nullptr,
+                                BatchAAResults *AA = nullptr,
                                 bool *IsLoadCSE = nullptr,
                                 unsigned *NumScanedInst = nullptr);
 
@@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
 /// FindAvailableLoadedValue() for the case where we are not interested in
 /// finding the closest clobbering instruction if no available load is found.
 /// This overload cannot be used to scan across multiple blocks.
-Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
+Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
+                                bool *IsLoadCSE,
                                 unsigned MaxInstsToScan = DefMaxInstsToScan);
 
 /// Scan backwards to see if we have the value of the given pointer available
@@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
 Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy,
                                  bool AtLeastAtomic, BasicBlock *ScanBB,
                                  BasicBlock::iterator &ScanFrom,
-                                 unsigned MaxInstsToScan, AAResults *AA,
+                                 unsigned MaxInstsToScan, BatchAAResults *AA,
                                  bool *IsLoadCSE, unsigned *NumScanedInst);
 
 /// Returns true if a pointer value \p A can be replace with another pointer
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index 1ebc593016bc0..16635097d20af 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -657,11 +657,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
     BasicBlock::iterator BBI = L->getIterator();
     BasicBlock *BB = L->getParent();
     SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+    BatchAAResults BatchAA(*AA);
     for (;;) {
       if (!VisitedBlocks.insert(BB).second)
         break;
       if (Value *U =
-              FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA))
+              FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA))
         return findValueImpl(U, OffsetOk, Visited);
       if (BBI != BB->begin())
         break;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 97d21db86abf2..6bf0d2f56eb4e 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -450,11 +450,10 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
            "to scan backward from a given instruction, when searching for "
            "available loaded value"));
 
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
-                                      BasicBlock *ScanBB,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
                                       BasicBlock::iterator &ScanFrom,
                                       unsigned MaxInstsToScan,
-                                      AAResults *AA, bool *IsLoad,
+                                      BatchAAResults *AA, bool *IsLoad,
                                       unsigned *NumScanedInst) {
   // Don't CSE load that is volatile or anything stronger than unordered.
   if (!Load->isUnordered())
@@ -583,7 +582,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
 Value *llvm::findAvailablePtrLoadStore(
     const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic,
     BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan,
-    AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
+    BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
   if (MaxInstsToScan == 0)
     MaxInstsToScan = ~0U;
 
@@ -664,7 +663,7 @@ Value *llvm::findAvailablePtrLoadStore(
   return nullptr;
 }
 
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
                                       bool *IsLoadCSE,
                                       unsigned MaxInstsToScan) {
   const DataLayout &DL = Load->getModule()->getDataLayout();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index bb2a77daa60a7..1254a050027a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1032,7 +1032,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
   bool IsLoadCSE = false;
-  if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
+  BatchAAResults BatchAA(*AA);
+  if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
     if (IsLoadCSE)
       combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
 
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 8603c5cf9c022..d7d689f58a070 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1260,8 +1260,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
   // the entry to its block.
   BasicBlock::iterator BBIt(LoadI);
   bool IsLoadCSE;
+  BatchAAResults BatchAA(*AA);
   if (Value *AvailableVal = FindAvailableLoadedValue(
-          LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
+          LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
     // If the value of the load is locally available within the block, just use
     // it.  This frequently occurs for reg2mem'd allocas.
 
@@ -1322,9 +1323,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
     MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
                        LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
                        AATags);
-    PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
-                                              PredBB, BBIt, DefMaxInstsToScan,
-                                              AA, &IsLoadCSE, &NumScanedInst);
+    PredAvailable = findAvailablePtrLoadStore(
+        Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
+        &BatchAA, &IsLoadCSE, &NumScanedInst);
 
     // If PredBB has a single predecessor, continue scanning through the
     // single predecessor.
@@ -1336,7 +1337,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
         BBIt = SinglePredBB->end();
         PredAvailable = findAvailablePtrLoadStore(
             Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
-            (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
+            (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
             &NumScanedInst);
       }
     }

>From 91d95bb02eebb8f3ea2f1eb1e722feac4ce3b2ed Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 24 Jan 2024 15:03:24 +0100
Subject: [PATCH 2/3] [JumpThreading] Add test for #79175 (NFC)

(cherry picked from commit 7143b451d71fe314730f7610d7908e3b9611815c)
---
 llvm/test/Transforms/JumpThreading/pr79175.ll | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 llvm/test/Transforms/JumpThreading/pr79175.ll

diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll
new file mode 100644
index 0000000000000..6815aabb26dfc
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/pr79175.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=jump-threading < %s | FileCheck %s
+
+ at f = external global i32
+
+; Make sure the value of @f is reloaded prior to the final comparison.
+; FIXME: This is a miscompile.
+define i32 @test(i64 %idx, i32 %val) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[IDX]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[RETURN:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[F:%.*]] = load i32, ptr @f, align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[F]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[COND_END_THREAD:%.*]], label [[COND_END:%.*]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0
+; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[CMP_I]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]]
+; CHECK:       cond.end.thread:
+; CHECK-NEXT:    [[F_RELOAD_PR:%.*]] = load i32, ptr @f, align 4
+; CHECK-NEXT:    br label [[TMP0]]
+; CHECK:       0:
+; CHECK-NEXT:    [[F_RELOAD:%.*]] = phi i32 [ [[F]], [[COND_END]] ], [ [[F_RELOAD_PR]], [[COND_END_THREAD]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ]
+; CHECK-NEXT:    [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]]
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[F_IDX]], align 4
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1
+; CHECK-NEXT:    br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       return2:
+; CHECK-NEXT:    ret i32 1
+;
+entry:
+  %cmp = icmp slt i64 %idx, 1
+  br i1 %cmp, label %for.body, label %return
+
+for.body:
+  %f = load i32, ptr @f, align 4
+  %cmp1 = icmp eq i32 %f, 0
+  br i1 %cmp1, label %cond.end, label %cond.false
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %phi = phi i32 [ %val, %cond.false ], [ 1, %for.body ]
+  %cmp.i = icmp sgt i32 %phi, 0
+  %sel = select i1 %cmp.i, i32 0, i32 %phi
+  %f.idx = getelementptr inbounds i32, ptr @f, i64 %idx
+  store i32 %sel, ptr %f.idx, align 4
+  %f.reload = load i32, ptr @f, align 4
+  %cmp3 = icmp slt i32 %f.reload, 1
+  br i1 %cmp3, label %return2, label %return
+
+return:
+  ret i32 0
+
+return2:
+  ret i32 1
+}

>From 9a7c70234b1730815f89f4a68030e62f7cd75cdc Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 31 Jan 2024 15:23:53 +0100
Subject: [PATCH 3/3] [AA][JumpThreading] Don't use DomTree for AA in
 JumpThreading (#79294)

JumpThreading may perform AA queries while the dominator tree is not up
to date, which may result in miscompilations.

Fix this by adding a new AAQI option to disable the use of the dominator
tree in BasicAA.

Fixes https://github.com/llvm/llvm-project/issues/79175.

(cherry picked from commit 4f32f5d5720fbef06672714a62376f236a36aef5)
---
 llvm/include/llvm/Analysis/AliasAnalysis.h      |  7 +++++++
 llvm/include/llvm/Analysis/BasicAliasAnalysis.h | 14 ++++++++++----
 llvm/lib/Analysis/BasicAliasAnalysis.cpp        |  6 ++++--
 llvm/lib/Transforms/Scalar/JumpThreading.cpp    |  2 ++
 llvm/test/Transforms/JumpThreading/pr79175.ll   |  4 +---
 5 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index d6f732d35fd4c..e8e4f491be5a3 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -287,6 +287,10 @@ class AAQueryInfo {
   ///   store %l, ...
   bool MayBeCrossIteration = false;
 
+  /// Whether alias analysis is allowed to use the dominator tree, for use by
+  /// passes that lazily update the DT while performing AA queries.
+  bool UseDominatorTree = true;
+
   AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {}
 };
 
@@ -668,6 +672,9 @@ class BatchAAResults {
   void enableCrossIterationMode() {
     AAQI.MayBeCrossIteration = true;
   }
+
+  /// Disable the use of the dominator tree during alias analysis queries.
+  void disableDominatorTree() { AAQI.UseDominatorTree = false; }
 };
 
 /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index afc1811239f28..7eca82729430d 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase {
   const Function &F;
   const TargetLibraryInfo &TLI;
   AssumptionCache &AC;
-  DominatorTree *DT;
+  /// Use getDT() instead of accessing this member directly, in order to
+  /// respect the AAQI.UseDominatorTree option.
+  DominatorTree *DT_;
+
+  DominatorTree *getDT(const AAQueryInfo &AAQI) const {
+    return AAQI.UseDominatorTree ? DT_ : nullptr;
+  }
 
 public:
   BasicAAResult(const DataLayout &DL, const Function &F,
                 const TargetLibraryInfo &TLI, AssumptionCache &AC,
                 DominatorTree *DT = nullptr)
-      : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {}
+      : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {}
 
   BasicAAResult(const BasicAAResult &Arg)
       : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
-        DT(Arg.DT) {}
+        DT_(Arg.DT_) {}
   BasicAAResult(BasicAAResult &&Arg)
       : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
-        AC(Arg.AC), DT(Arg.DT) {}
+        AC(Arg.AC), DT_(Arg.DT_) {}
 
   /// Handle invalidation events in the new pass manager.
   bool invalidate(Function &Fn, const PreservedAnalyses &PA,
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3178e2d278167..1028b52a79123 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -89,7 +89,7 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
   // may be created without handles to some analyses and in that case don't
   // depend on them.
   if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) ||
-      (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
+      (DT_ && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
     return true;
 
   // Otherwise this analysis result remains valid.
@@ -1063,6 +1063,7 @@ AliasResult BasicAAResult::aliasGEP(
                                              : AliasResult::MayAlias;
   }
 
+  DominatorTree *DT = getDT(AAQI);
   DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
   DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
 
@@ -1556,6 +1557,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
         const Value *HintO1 = getUnderlyingObject(Hint1);
         const Value *HintO2 = getUnderlyingObject(Hint2);
 
+        DominatorTree *DT = getDT(AAQI);
         auto ValidAssumeForPtrContext = [&](const Value *Ptr) {
           if (const Instruction *PtrI = dyn_cast<Instruction>(Ptr)) {
             return isValidAssumeForContext(Assume, PtrI, DT,
@@ -1735,7 +1737,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
   if (!Inst || Inst->getParent()->isEntryBlock())
     return true;
 
-  return isNotInCycle(Inst, DT, /*LI*/ nullptr);
+  return isNotInCycle(Inst, getDT(AAQI), /*LI*/ nullptr);
 }
 
 /// Computes the symbolic difference between two de-composed GEPs.
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index d7d689f58a070..87c01ead634ff 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1261,6 +1261,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
   BasicBlock::iterator BBIt(LoadI);
   bool IsLoadCSE;
   BatchAAResults BatchAA(*AA);
+  // The dominator tree is updated lazily and may not be valid at this point.
+  BatchAA.disableDominatorTree();
   if (Value *AvailableVal = FindAvailableLoadedValue(
           LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
     // If the value of the load is locally available within the block, just use
diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll
index 6815aabb26dfc..2c7ee0770cdc7 100644
--- a/llvm/test/Transforms/JumpThreading/pr79175.ll
+++ b/llvm/test/Transforms/JumpThreading/pr79175.ll
@@ -4,7 +4,6 @@
 @f = external global i32
 
 ; Make sure the value of @f is reloaded prior to the final comparison.
-; FIXME: This is a miscompile.
 define i32 @test(i64 %idx, i32 %val) {
 ; CHECK-LABEL: define i32 @test(
 ; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) {
@@ -20,13 +19,12 @@ define i32 @test(i64 %idx, i32 %val) {
 ; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[CMP_I]]
 ; CHECK-NEXT:    br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]]
 ; CHECK:       cond.end.thread:
-; CHECK-NEXT:    [[F_RELOAD_PR:%.*]] = load i32, ptr @f, align 4
 ; CHECK-NEXT:    br label [[TMP0]]
 ; CHECK:       0:
-; CHECK-NEXT:    [[F_RELOAD:%.*]] = phi i32 [ [[F]], [[COND_END]] ], [ [[F_RELOAD_PR]], [[COND_END_THREAD]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ]
 ; CHECK-NEXT:    [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]]
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr [[F_IDX]], align 4
+; CHECK-NEXT:    [[F_RELOAD:%.*]] = load i32, ptr @f, align 4
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1
 ; CHECK-NEXT:    br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]]
 ; CHECK:       return:



More information about the llvm-branch-commits mailing list