[llvm] [ConstraintElim] Use constraints from bounded memory accesses (PR #155253)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 30 23:33:56 PDT 2025


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/155253

>From 7129198e85d62b79025dc7f5ab861d9d3d244318 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 25 Aug 2025 22:17:22 +0800
Subject: [PATCH 1/6] [ConstraintElim] Add pre-commit tests. NFC.

---
 .../implied-by-bounded-memory-access.ll       | 261 ++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100644 llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll

diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
new file mode 100644
index 0000000000000..d7b66a03cb113
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
+
+ at g = private unnamed_addr constant [5 x i8] c"test\00"
+
+declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare void @callee(i1)
+
+define i8 @load_global(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i1 @store_global(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  store i8 0, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  ret i1 %cmp
+}
+
+define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_byval(
+; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_alloca(i64 %idx) {
+; CHECK-LABEL: define i8 @load_alloca(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[ALLOC:%.*]] = alloca [5 x i8], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %alloc = alloca [5 x i8], align 1
+  call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+  %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_malloc(i64 %idx) {
+; CHECK-LABEL: define i8 @load_malloc(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 5)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    call void @free(ptr [[ALLOC]])
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %alloc = call ptr @malloc(i64 5)
+  call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+  %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  call void @free(ptr %alloc)
+  ret i8 %add
+}
+
+define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 7
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %load = load i32, ptr %gep
+  %cmp = icmp ult i64 %idx, 7
+  %zext = zext i1 %cmp to i32
+  %add = add i32 %load, %zext
+  ret i32 %add
+}
+
+define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_may_noreturn_dom_bb(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    call void @callee(i1 [[CMP1]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    br label %[[NEXT:.*]]
+; CHECK:       [[NEXT]]:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP2]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %cmp1 = icmp ult i64 %idx, 5
+  call void @callee(i1 %cmp1) ; %cmp1 should not be simplified.
+  %load = load i8, ptr %gep
+  br label %next
+
+next:
+  %cmp2 = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp2 to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+; Negative tests.
+
+define i8 @load_from_non_gep(ptr %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_non_gep(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %load = load i8, ptr %p
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: define i8 @load_global_multi_indices(
+; CHECK-SAME: i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]) {
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX1]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX1]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep1 = getelementptr inbounds i8, ptr @g, i64 %idx1
+  %gep2 = getelementptr inbounds i8, ptr %gep1, i64 %idx2
+  %load = load i8, ptr %gep2
+  %cmp = icmp ult i64 %idx1, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_global_without_inbounds(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_without_inbounds(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr i8, ptr @g, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32_smaller_range(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %load = load i32, ptr %gep
+  %cmp = icmp ult i64 %idx, 6
+  %zext = zext i1 %cmp to i32
+  %add = add i32 %load, %zext
+  ret i32 %add
+}
+
+define i8 @load_global_volatile(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_volatile(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %load = load volatile i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_global_vscale(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_vscale(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load <vscale x 1 x i8>, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <vscale x 1 x i8> [[LOAD]], i64 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[EXT]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %load = load <vscale x 1 x i8>, ptr %gep
+  %ext = extractelement <vscale x 1 x i8> %load, i64 0
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %ext, %zext
+  ret i8 %add
+}

>From d11c4d1c391f269badcf675f301b982ee25cf742 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 25 Aug 2025 22:50:47 +0800
Subject: [PATCH 2/6] [ConstraintElim] Use constraint from bounded memory
 accesses

---
 .../Scalar/ConstraintElimination.cpp          | 102 ++++++++++++++++--
 .../implied-by-bounded-memory-access.ll       |  21 ++--
 2 files changed, 103 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 1ddb8ae9518fc..f15281f09e74b 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -19,9 +19,11 @@
 #include "llvm/Analysis/ConstraintSystem.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@@ -170,10 +172,12 @@ struct State {
   DominatorTree &DT;
   LoopInfo &LI;
   ScalarEvolution &SE;
+  TargetLibraryInfo &TLI;
   SmallVector<FactOrCheck, 64> WorkList;
 
-  State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE)
-      : DT(DT), LI(LI), SE(SE) {}
+  State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE,
+        TargetLibraryInfo &TLI)
+      : DT(DT), LI(LI), SE(SE), TLI(TLI) {}
 
   /// Process block \p BB and add known facts to work-list.
   void addInfoFor(BasicBlock &BB);
@@ -1109,8 +1113,39 @@ void State::addInfoForInductions(BasicBlock &BB) {
   }
 }
 
+static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
+                                          uint64_t AccessSize,
+                                          CmpPredicate &Pred, Value *&A,
+                                          Value *&B, const DataLayout &DL,
+                                          const TargetLibraryInfo &TLI) {
+  auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL);
+  if (!Offset.NW.isInBounds())
+    return false;
+
+  if (Offset.VariableOffsets.size() != 1)
+    return false;
+
+  ObjectSizeOpts Opts;
+  ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
+  SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
+  if (!Data.bothKnown() || !Data.Offset.isZero())
+    return false;
+
+  // Index * Scale + ConstOffset + AccessSize <= AllocSize
+  uint64_t BitWidth = Offset.ConstantOffset.getBitWidth();
+  auto &[Index, Scale] = Offset.VariableOffsets.front();
+  APInt MaxIndex =
+      (Data.Size - APInt(BitWidth, AccessSize) - Offset.ConstantOffset)
+          .udiv(Scale);
+  Pred = ICmpInst::ICMP_ULE;
+  A = Index;
+  B = ConstantInt::get(Index->getType(), MaxIndex);
+  return true;
+}
+
 void State::addInfoFor(BasicBlock &BB) {
   addInfoForInductions(BB);
+  auto &DL = BB.getDataLayout();
 
   // True as long as long as the current instruction is guaranteed to execute.
   bool GuaranteedToExecute = true;
@@ -1127,6 +1162,38 @@ void State::addInfoFor(BasicBlock &BB) {
       continue;
     }
 
+    auto AddFactFromMemoryAccess = [&](Value *Ptr, Type *AccessType) {
+      auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+      if (!GEP)
+        return;
+      TypeSize AccessSize = DL.getTypeStoreSize(AccessType);
+      if (!AccessSize.isFixed())
+        return;
+      if (GuaranteedToExecute) {
+        CmpPredicate Pred;
+        Value *A, *B;
+        if (getConstraintFromMemoryAccess(*GEP, AccessSize.getFixedValue(),
+                                          Pred, A, B, DL, TLI)) {
+          // The memory access is guaranteed to execute when BB is entered,
+          // hence the constraint holds on entry to BB.
+          WorkList.emplace_back(FactOrCheck::getConditionFact(
+              DT.getNode(I.getParent()), Pred, A, B));
+        }
+      } else {
+        WorkList.emplace_back(
+            FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
+      }
+    };
+
+    if (auto *LI = dyn_cast<LoadInst>(&I)) {
+      if (LI->isSimple())
+        AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType());
+    }
+    if (auto *SI = dyn_cast<StoreInst>(&I)) {
+      if (SI->isSimple())
+        AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType());
+    }
+
     auto *II = dyn_cast<IntrinsicInst>(&I);
     Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
     switch (ID) {
@@ -1420,7 +1487,7 @@ static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A,
   LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n");
 
   auto R = Info.getConstraintForSolving(Pred, A, B);
-  if (R.empty() || !R.isValid(Info)){
+  if (R.empty() || !R.isValid(Info)) {
     LLVM_DEBUG(dbgs() << "   failed to decompose condition\n");
     return std::nullopt;
   }
@@ -1785,12 +1852,13 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
 
 static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
                                  ScalarEvolution &SE,
-                                 OptimizationRemarkEmitter &ORE) {
+                                 OptimizationRemarkEmitter &ORE,
+                                 TargetLibraryInfo &TLI) {
   bool Changed = false;
   DT.updateDFSNumbers();
   SmallVector<Value *> FunctionArgs(llvm::make_pointer_range(F.args()));
   ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
-  State S(DT, LI, SE);
+  State S(DT, LI, SE, TLI);
   std::unique_ptr<Module> ReproducerModule(
       DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
 
@@ -1960,6 +2028,26 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
         }
         continue;
       }
+
+      auto &DL = F.getDataLayout();
+      auto AddFactsAboutIndices = [&](Value *Ptr, Type *AccessType) {
+        CmpPredicate Pred;
+        Value *A, *B;
+        if (getConstraintFromMemoryAccess(
+                *cast<GetElementPtrInst>(Ptr),
+                DL.getTypeStoreSize(AccessType).getFixedValue(), Pred, A, B, DL,
+                TLI))
+          AddFact(Pred, A, B);
+      };
+
+      if (auto *LI = dyn_cast<LoadInst>(CB.Inst)) {
+        AddFactsAboutIndices(LI->getPointerOperand(), LI->getAccessType());
+        continue;
+      }
+      if (auto *SI = dyn_cast<StoreInst>(CB.Inst)) {
+        AddFactsAboutIndices(SI->getPointerOperand(), SI->getAccessType());
+        continue;
+      }
     }
 
     Value *A = nullptr, *B = nullptr;
@@ -2018,10 +2106,12 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F,
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-  if (!eliminateConstraints(F, DT, LI, SE, ORE))
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  if (!eliminateConstraints(F, DT, LI, SE, ORE, TLI))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
+  PA.preserve<TargetLibraryAnalysis>();
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<LoopAnalysis>();
   PA.preserve<ScalarEvolutionAnalysis>();
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index d7b66a03cb113..c2f2ff1a701c3 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -12,8 +12,7 @@ define i8 @load_global(i64 %idx) {
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
@@ -30,8 +29,7 @@ define i1 @store_global(i64 %idx) {
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
   %gep = getelementptr inbounds i8, ptr @g, i64 %idx
   store i8 0, ptr %gep
@@ -44,8 +42,7 @@ define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
 ; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
@@ -64,8 +61,7 @@ define i8 @load_alloca(i64 %idx) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
@@ -86,8 +82,7 @@ define i8 @load_malloc(i64 %idx) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    call void @free(ptr [[ALLOC]])
 ; CHECK-NEXT:    ret i8 [[ADD]]
@@ -108,8 +103,7 @@ define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
 ; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 7
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
@@ -130,8 +124,7 @@ define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    br label %[[NEXT:.*]]
 ; CHECK:       [[NEXT]]:
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP2]] to i8
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;

>From 9eabc1c6505a31d88b66212f636d0a51c923791c Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 26 Aug 2025 00:14:27 +0800
Subject: [PATCH 3/6] [ConstraintElim] Bail out on null base

---
 .../Scalar/ConstraintElimination.cpp           |  2 ++
 .../implied-by-bounded-memory-access.ll        | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index f15281f09e74b..824a50d8ae68e 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1126,6 +1126,8 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
     return false;
 
   ObjectSizeOpts Opts;
+  // Workaround for gep inbounds, ptr null, idx.
+  Opts.NullIsUnknownSize = true;
   ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
   SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
   if (!Data.bothKnown() || !Data.Offset.isZero())
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index c2f2ff1a701c3..e3ac4ee5d1c2a 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -252,3 +252,21 @@ define i8 @load_global_vscale(i64 %idx) {
   %add = add i8 %ext, %zext
   ret i8 %add
 }
+
+define i8 @load_from_null(i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_null(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr inbounds i8, ptr null, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}

>From 8e8a43b5d6117e33372142c38eb34420404a7e9f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 26 Aug 2025 00:16:52 +0800
Subject: [PATCH 4/6] Fix typo. NFC.

---
 llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 824a50d8ae68e..d4b1cb80b5803 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1149,7 +1149,7 @@ void State::addInfoFor(BasicBlock &BB) {
   addInfoForInductions(BB);
   auto &DL = BB.getDataLayout();
 
-  // True as long as long as the current instruction is guaranteed to execute.
+  // True as long as the current instruction is guaranteed to execute.
   bool GuaranteedToExecute = true;
   // Queue conditions and assumes.
   for (Instruction &I : BB) {

>From a1f5d8a25ab30a958ca87bf87f6a2536fcc1aede Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 27 Aug 2025 00:55:23 +0800
Subject: [PATCH 5/6] [ConstraintElim] Address review comments.

---
 .../Scalar/ConstraintElimination.cpp          |  6 +-
 .../implied-by-bounded-memory-access.ll       | 62 +++++++++----------
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index d4b1cb80b5803..32eda0468bb64 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1119,7 +1119,7 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
                                           Value *&B, const DataLayout &DL,
                                           const TargetLibraryInfo &TLI) {
   auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL);
-  if (!Offset.NW.isInBounds())
+  if (!Offset.NW.hasNoUnsignedWrap())
     return false;
 
   if (Offset.VariableOffsets.size() != 1)
@@ -1134,6 +1134,9 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
     return false;
 
   // Index * Scale + ConstOffset + AccessSize <= AllocSize
+  // With nuw flag, we know that the index addition doesn't have unsigned wrap.
+  // If (AllocSize - (ConstOffset + AccessSize)) wraps around, there is no valid
+  // value for Index.
   uint64_t BitWidth = Offset.ConstantOffset.getBitWidth();
   auto &[Index, Scale] = Offset.VariableOffsets.front();
   APInt MaxIndex =
@@ -2113,7 +2116,6 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F,
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
-  PA.preserve<TargetLibraryAnalysis>();
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<LoopAnalysis>();
   PA.preserve<ScalarEvolutionAnalysis>();
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index e3ac4ee5d1c2a..ca7aa76866795 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -5,18 +5,18 @@
 
 declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
 declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
-declare void @callee(i1)
+declare void @may_not_return(i1)
 
 define i8 @load_global(i64 %idx) {
 ; CHECK-LABEL: define i8 @load_global(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
   %load = load i8, ptr %gep
   %cmp = icmp ult i64 %idx, 5
   %zext = zext i1 %cmp to i8
@@ -27,11 +27,11 @@ define i8 @load_global(i64 %idx) {
 define i1 @store_global(i64 %idx) {
 ; CHECK-LABEL: define i1 @store_global(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
 ; CHECK-NEXT:    ret i1 true
 ;
-  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
   store i8 0, ptr %gep
   %cmp = icmp ult i64 %idx, 5
   ret i1 %cmp
@@ -40,13 +40,13 @@ define i1 @store_global(i64 %idx) {
 define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
 ; CHECK-LABEL: define i8 @load_byval(
 ; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %gep = getelementptr nuw i8, ptr %p, i64 %idx
   %load = load i8, ptr %gep
   %cmp = icmp ult i64 %idx, 5
   %zext = zext i1 %cmp to i8
@@ -59,7 +59,7 @@ define i8 @load_alloca(i64 %idx) {
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[ALLOC:%.*]] = alloca [5 x i8], align 1
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[ALLOC]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
@@ -67,7 +67,7 @@ define i8 @load_alloca(i64 %idx) {
 ;
   %alloc = alloca [5 x i8], align 1
   call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
-  %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+  %gep = getelementptr nuw i8, ptr %alloc, i64 %idx
   %load = load i8, ptr %gep
   %cmp = icmp ult i64 %idx, 5
   %zext = zext i1 %cmp to i8
@@ -80,7 +80,7 @@ define i8 @load_malloc(i64 %idx) {
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 5)
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[ALLOC]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
@@ -89,7 +89,7 @@ define i8 @load_malloc(i64 %idx) {
 ;
   %alloc = call ptr @malloc(i64 5)
   call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
-  %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+  %gep = getelementptr nuw i8, ptr %alloc, i64 %idx
   %load = load i8, ptr %gep
   %cmp = icmp ult i64 %idx, 5
   %zext = zext i1 %cmp to i8
@@ -101,13 +101,13 @@ define i8 @load_malloc(i64 %idx) {
 define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
 ; CHECK-LABEL: define i32 @load_byval_i32(
 ; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %gep = getelementptr nuw i8, ptr %p, i64 %idx
   %load = load i32, ptr %gep
   %cmp = icmp ult i64 %idx, 7
   %zext = zext i1 %cmp to i32
@@ -118,9 +118,9 @@ define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
 define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
 ; CHECK-LABEL: define i8 @load_global_may_noreturn_dom_bb(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT:    call void @callee(i1 [[CMP1]])
+; CHECK-NEXT:    call void @may_not_return(i1 [[CMP1]])
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    br label %[[NEXT:.*]]
 ; CHECK:       [[NEXT]]:
@@ -128,9 +128,9 @@ define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
   %cmp1 = icmp ult i64 %idx, 5
-  call void @callee(i1 %cmp1) ; %cmp1 should not be simplified.
+  call void @may_not_return(i1 %cmp1) ; %cmp1 should not be simplified.
   %load = load i8, ptr %gep
   br label %next
 
@@ -162,16 +162,16 @@ define i8 @load_from_non_gep(ptr %p, i64 %idx) {
 define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
 ; CHECK-LABEL: define i8 @load_global_multi_indices(
 ; CHECK-SAME: i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]) {
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX1]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2]]
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX1]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr nuw i8, ptr [[GEP1]], i64 [[IDX2]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP2]], align 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX1]], 5
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
-  %gep1 = getelementptr inbounds i8, ptr @g, i64 %idx1
-  %gep2 = getelementptr inbounds i8, ptr %gep1, i64 %idx2
+  %gep1 = getelementptr nuw i8, ptr @g, i64 %idx1
+  %gep2 = getelementptr nuw i8, ptr %gep1, i64 %idx2
   %load = load i8, ptr %gep2
   %cmp = icmp ult i64 %idx1, 5
   %zext = zext i1 %cmp to i8
@@ -179,8 +179,8 @@ define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
   ret i8 %add
 }
 
-define i8 @load_global_without_inbounds(i64 %idx) {
-; CHECK-LABEL: define i8 @load_global_without_inbounds(
+define i8 @load_global_without_nuw(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_without_nuw(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
@@ -200,14 +200,14 @@ define i8 @load_global_without_inbounds(i64 %idx) {
 define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
 ; CHECK-LABEL: define i32 @load_byval_i32_smaller_range(
 ; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+  %gep = getelementptr nuw i8, ptr %p, i64 %idx
   %load = load i32, ptr %gep
   %cmp = icmp ult i64 %idx, 6
   %zext = zext i1 %cmp to i32
@@ -218,14 +218,14 @@ define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
 define i8 @load_global_volatile(i64 %idx) {
 ; CHECK-LABEL: define i8 @load_global_volatile(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
   %load = load volatile i8, ptr %gep
   %cmp = icmp ult i64 %idx, 5
   %zext = zext i1 %cmp to i8
@@ -236,7 +236,7 @@ define i8 @load_global_volatile(i64 %idx) {
 define i8 @load_global_vscale(i64 %idx) {
 ; CHECK-LABEL: define i8 @load_global_vscale(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <vscale x 1 x i8>, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[EXT:%.*]] = extractelement <vscale x 1 x i8> [[LOAD]], i64 0
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
@@ -244,7 +244,7 @@ define i8 @load_global_vscale(i64 %idx) {
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[EXT]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
   %load = load <vscale x 1 x i8>, ptr %gep
   %ext = extractelement <vscale x 1 x i8> %load, i64 0
   %cmp = icmp ult i64 %idx, 5
@@ -256,14 +256,14 @@ define i8 @load_global_vscale(i64 %idx) {
 define i8 @load_from_null(i64 %idx) {
 ; CHECK-LABEL: define i8 @load_from_null(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr null, i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
 ; CHECK-NEXT:    ret i8 [[ADD]]
 ;
-  %gep = getelementptr inbounds i8, ptr null, i64 %idx
+  %gep = getelementptr nuw i8, ptr null, i64 %idx
   %load = load i8, ptr %gep
   %cmp = icmp ult i64 %idx, 5
   %zext = zext i1 %cmp to i8

>From d28609be7d6a0862791a9d3a0e2d4175c8ff13f3 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 31 Aug 2025 14:33:35 +0800
Subject: [PATCH 6/6] [ConstraintElim] Address review comments.

---
 .../Scalar/ConstraintElimination.cpp          |  7 +-
 .../implied-by-bounded-memory-access.ll       | 82 +++++++++++++++++++
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 32eda0468bb64..ffa96826129be 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1128,6 +1128,9 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
   ObjectSizeOpts Opts;
   // Workaround for gep inbounds, ptr null, idx.
   Opts.NullIsUnknownSize = true;
+  // Be conservative since we are not clear on whether an out of bounds access
+  // to the padding is UB or not.
+  Opts.RoundToAlign = true;
   ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
   SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
   if (!Data.bothKnown() || !Data.Offset.isZero())
@@ -1191,11 +1194,11 @@ void State::addInfoFor(BasicBlock &BB) {
     };
 
     if (auto *LI = dyn_cast<LoadInst>(&I)) {
-      if (LI->isSimple())
+      if (!LI->isVolatile())
         AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType());
     }
     if (auto *SI = dyn_cast<StoreInst>(&I)) {
-      if (SI->isSimple())
+      if (!SI->isVolatile())
         AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType());
     }
 
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index ca7aa76866795..5338ac52db962 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -2,6 +2,8 @@
 ; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
 
 @g = private unnamed_addr constant [5 x i8] c"test\00"
+ at g_overaligned = private unnamed_addr constant [5 x i8] c"test\00", align 8
+ at g_external = external global [5 x i8]
 
 declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
 declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
@@ -24,6 +26,23 @@ define i8 @load_global(i64 %idx) {
   ret i8 %add
 }
 
+define i8 @load_global_atomic(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_atomic(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load atomic i8, ptr [[GEP]] unordered, align 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
+  %load = load atomic i8, ptr %gep unordered, align 1
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
 define i1 @store_global(i64 %idx) {
 ; CHECK-LABEL: define i1 @store_global(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
@@ -37,6 +56,19 @@ define i1 @store_global(i64 %idx) {
   ret i1 %cmp
 }
 
+define i1 @store_global_atomic(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global_atomic(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    store atomic i8 0, ptr [[GEP]] release, align 1
+; CHECK-NEXT:    ret i1 true
+;
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
+  store atomic i8 0, ptr %gep release, align 1
+  %cmp = icmp ult i64 %idx, 5
+  ret i1 %cmp
+}
+
 define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
 ; CHECK-LABEL: define i8 @load_byval(
 ; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
@@ -143,6 +175,42 @@ next:
 
 ; Negative tests.
 
+define i8 @load_global_overaligned(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_overaligned(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g_overaligned, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr nuw i8, ptr @g_overaligned, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
+define i8 @load_global_external(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_external(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g_external, i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %gep = getelementptr nuw i8, ptr @g_external, i64 %idx
+  %load = load i8, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  %zext = zext i1 %cmp to i8
+  %add = add i8 %load, %zext
+  ret i8 %add
+}
+
 define i8 @load_from_non_gep(ptr %p, i64 %idx) {
 ; CHECK-LABEL: define i8 @load_from_non_gep(
 ; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) {
@@ -233,6 +301,20 @@ define i8 @load_global_volatile(i64 %idx) {
   ret i8 %add
 }
 
+define i1 @store_global_volatile(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global_volatile(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT:    store volatile i8 0, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr nuw i8, ptr @g, i64 %idx
+  store volatile i8 0, ptr %gep
+  %cmp = icmp ult i64 %idx, 5
+  ret i1 %cmp
+}
+
 define i8 @load_global_vscale(i64 %idx) {
 ; CHECK-LABEL: define i8 @load_global_vscale(
 ; CHECK-SAME: i64 [[IDX:%.*]]) {



More information about the llvm-commits mailing list