[llvm] [ConstraintElim] Use constraints from bounded memory accesses (PR #155253)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 30 23:33:56 PDT 2025
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/155253
>From 7129198e85d62b79025dc7f5ab861d9d3d244318 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 25 Aug 2025 22:17:22 +0800
Subject: [PATCH 1/6] [ConstraintElim] Add pre-commit tests. NFC.
---
.../implied-by-bounded-memory-access.ll | 261 ++++++++++++++++++
1 file changed, 261 insertions(+)
create mode 100644 llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
new file mode 100644
index 0000000000000..d7b66a03cb113
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
+
+ at g = private unnamed_addr constant [5 x i8] c"test\00"
+
+declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
+declare void @callee(i1)
+
+define i8 @load_global(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i1 @store_global(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ store i8 0, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ ret i1 %cmp
+}
+
+define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_byval(
+; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_alloca(i64 %idx) {
+; CHECK-LABEL: define i8 @load_alloca(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[ALLOC:%.*]] = alloca [5 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %alloc = alloca [5 x i8], align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+ %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_malloc(i64 %idx) {
+; CHECK-LABEL: define i8 @load_malloc(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 5)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: call void @free(ptr [[ALLOC]])
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %alloc = call ptr @malloc(i64 5)
+ call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
+ %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ call void @free(ptr %alloc)
+ ret i8 %add
+}
+
+define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 7
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %load = load i32, ptr %gep
+ %cmp = icmp ult i64 %idx, 7
+ %zext = zext i1 %cmp to i32
+ %add = add i32 %load, %zext
+ ret i32 %add
+}
+
+define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_may_noreturn_dom_bb(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: call void @callee(i1 [[CMP1]])
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: br label %[[NEXT:.*]]
+; CHECK: [[NEXT]]:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP2]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %cmp1 = icmp ult i64 %idx, 5
+ call void @callee(i1 %cmp1) ; %cmp1 should not be simplified.
+ %load = load i8, ptr %gep
+ br label %next
+
+next:
+ %cmp2 = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp2 to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+; Negative tests.
+
+define i8 @load_from_non_gep(ptr %p, i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_non_gep(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %load = load i8, ptr %p
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: define i8 @load_global_multi_indices(
+; CHECK-SAME: i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]) {
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX1]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep1 = getelementptr inbounds i8, ptr @g, i64 %idx1
+ %gep2 = getelementptr inbounds i8, ptr %gep1, i64 %idx2
+ %load = load i8, ptr %gep2
+ %cmp = icmp ult i64 %idx1, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_global_without_inbounds(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_without_inbounds(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr i8, ptr @g, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
+; CHECK-LABEL: define i32 @load_byval_i32_smaller_range(
+; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %load = load i32, ptr %gep
+ %cmp = icmp ult i64 %idx, 6
+ %zext = zext i1 %cmp to i32
+ %add = add i32 %load, %zext
+ ret i32 %add
+}
+
+define i8 @load_global_volatile(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_volatile(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %load = load volatile i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_global_vscale(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_vscale(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 1 x i8>, ptr [[GEP]], align 1
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 1 x i8> [[LOAD]], i64 0
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[EXT]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %load = load <vscale x 1 x i8>, ptr %gep
+ %ext = extractelement <vscale x 1 x i8> %load, i64 0
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %ext, %zext
+ ret i8 %add
+}
>From d11c4d1c391f269badcf675f301b982ee25cf742 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 25 Aug 2025 22:50:47 +0800
Subject: [PATCH 2/6] [ConstraintElim] Use constraint from bounded memory
accesses
---
.../Scalar/ConstraintElimination.cpp | 102 ++++++++++++++++--
.../implied-by-bounded-memory-access.ll | 21 ++--
2 files changed, 103 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 1ddb8ae9518fc..f15281f09e74b 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -19,9 +19,11 @@
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -170,10 +172,12 @@ struct State {
DominatorTree &DT;
LoopInfo &LI;
ScalarEvolution &SE;
+ TargetLibraryInfo &TLI;
SmallVector<FactOrCheck, 64> WorkList;
- State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE)
- : DT(DT), LI(LI), SE(SE) {}
+ State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE,
+ TargetLibraryInfo &TLI)
+ : DT(DT), LI(LI), SE(SE), TLI(TLI) {}
/// Process block \p BB and add known facts to work-list.
void addInfoFor(BasicBlock &BB);
@@ -1109,8 +1113,39 @@ void State::addInfoForInductions(BasicBlock &BB) {
}
}
+static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
+ uint64_t AccessSize,
+ CmpPredicate &Pred, Value *&A,
+ Value *&B, const DataLayout &DL,
+ const TargetLibraryInfo &TLI) {
+ auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL);
+ if (!Offset.NW.isInBounds())
+ return false;
+
+ if (Offset.VariableOffsets.size() != 1)
+ return false;
+
+ ObjectSizeOpts Opts;
+ ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
+ SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
+ if (!Data.bothKnown() || !Data.Offset.isZero())
+ return false;
+
+ // Index * Scale + ConstOffset + AccessSize <= AllocSize
+ uint64_t BitWidth = Offset.ConstantOffset.getBitWidth();
+ auto &[Index, Scale] = Offset.VariableOffsets.front();
+ APInt MaxIndex =
+ (Data.Size - APInt(BitWidth, AccessSize) - Offset.ConstantOffset)
+ .udiv(Scale);
+ Pred = ICmpInst::ICMP_ULE;
+ A = Index;
+ B = ConstantInt::get(Index->getType(), MaxIndex);
+ return true;
+}
+
void State::addInfoFor(BasicBlock &BB) {
addInfoForInductions(BB);
+ auto &DL = BB.getDataLayout();
// True as long as long as the current instruction is guaranteed to execute.
bool GuaranteedToExecute = true;
@@ -1127,6 +1162,38 @@ void State::addInfoFor(BasicBlock &BB) {
continue;
}
+ auto AddFactFromMemoryAccess = [&](Value *Ptr, Type *AccessType) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return;
+ TypeSize AccessSize = DL.getTypeStoreSize(AccessType);
+ if (!AccessSize.isFixed())
+ return;
+ if (GuaranteedToExecute) {
+ CmpPredicate Pred;
+ Value *A, *B;
+ if (getConstraintFromMemoryAccess(*GEP, AccessSize.getFixedValue(),
+ Pred, A, B, DL, TLI)) {
+ // The memory access is guaranteed to execute when BB is entered,
+ // hence the constraint holds on entry to BB.
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(I.getParent()), Pred, A, B));
+ }
+ } else {
+ WorkList.emplace_back(
+ FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
+ }
+ };
+
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isSimple())
+ AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType());
+ }
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ if (SI->isSimple())
+ AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType());
+ }
+
auto *II = dyn_cast<IntrinsicInst>(&I);
Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
switch (ID) {
@@ -1420,7 +1487,7 @@ static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A,
LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n");
auto R = Info.getConstraintForSolving(Pred, A, B);
- if (R.empty() || !R.isValid(Info)){
+ if (R.empty() || !R.isValid(Info)) {
LLVM_DEBUG(dbgs() << " failed to decompose condition\n");
return std::nullopt;
}
@@ -1785,12 +1852,13 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
ScalarEvolution &SE,
- OptimizationRemarkEmitter &ORE) {
+ OptimizationRemarkEmitter &ORE,
+ TargetLibraryInfo &TLI) {
bool Changed = false;
DT.updateDFSNumbers();
SmallVector<Value *> FunctionArgs(llvm::make_pointer_range(F.args()));
ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
- State S(DT, LI, SE);
+ State S(DT, LI, SE, TLI);
std::unique_ptr<Module> ReproducerModule(
DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
@@ -1960,6 +2028,26 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
}
continue;
}
+
+ auto &DL = F.getDataLayout();
+ auto AddFactsAboutIndices = [&](Value *Ptr, Type *AccessType) {
+ CmpPredicate Pred;
+ Value *A, *B;
+ if (getConstraintFromMemoryAccess(
+ *cast<GetElementPtrInst>(Ptr),
+ DL.getTypeStoreSize(AccessType).getFixedValue(), Pred, A, B, DL,
+ TLI))
+ AddFact(Pred, A, B);
+ };
+
+ if (auto *LI = dyn_cast<LoadInst>(CB.Inst)) {
+ AddFactsAboutIndices(LI->getPointerOperand(), LI->getAccessType());
+ continue;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(CB.Inst)) {
+ AddFactsAboutIndices(SI->getPointerOperand(), SI->getAccessType());
+ continue;
+ }
}
Value *A = nullptr, *B = nullptr;
@@ -2018,10 +2106,12 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F,
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- if (!eliminateConstraints(F, DT, LI, SE, ORE))
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ if (!eliminateConstraints(F, DT, LI, SE, ORE, TLI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
+ PA.preserve<TargetLibraryAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index d7b66a03cb113..c2f2ff1a701c3 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -12,8 +12,7 @@ define i8 @load_global(i64 %idx) {
; CHECK-SAME: i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
@@ -30,8 +29,7 @@ define i1 @store_global(i64 %idx) {
; CHECK-SAME: i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 true
;
%gep = getelementptr inbounds i8, ptr @g, i64 %idx
store i8 0, ptr %gep
@@ -44,8 +42,7 @@ define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
@@ -64,8 +61,7 @@ define i8 @load_alloca(i64 %idx) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
@@ -86,8 +82,7 @@ define i8 @load_malloc(i64 %idx) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: call void @free(ptr [[ALLOC]])
; CHECK-NEXT: ret i8 [[ADD]]
@@ -108,8 +103,7 @@ define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 7
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i32
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i32 [[ADD]]
;
@@ -130,8 +124,7 @@ define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: br label %[[NEXT:.*]]
; CHECK: [[NEXT]]:
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP2]] to i8
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
>From 9eabc1c6505a31d88b66212f636d0a51c923791c Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 26 Aug 2025 00:14:27 +0800
Subject: [PATCH 3/6] [ConstraintElim] Bail out on null base
---
.../Scalar/ConstraintElimination.cpp | 2 ++
.../implied-by-bounded-memory-access.ll | 18 ++++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index f15281f09e74b..824a50d8ae68e 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1126,6 +1126,8 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
return false;
ObjectSizeOpts Opts;
+ // Workaround for gep inbounds, ptr null, idx.
+ Opts.NullIsUnknownSize = true;
ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
if (!Data.bothKnown() || !Data.Offset.isZero())
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index c2f2ff1a701c3..e3ac4ee5d1c2a 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -252,3 +252,21 @@ define i8 @load_global_vscale(i64 %idx) {
%add = add i8 %ext, %zext
ret i8 %add
}
+
+define i8 @load_from_null(i64 %idx) {
+; CHECK-LABEL: define i8 @load_from_null(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr inbounds i8, ptr null, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
>From 8e8a43b5d6117e33372142c38eb34420404a7e9f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 26 Aug 2025 00:16:52 +0800
Subject: [PATCH 4/6] Fix typo. NFC.
---
llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 824a50d8ae68e..d4b1cb80b5803 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1149,7 +1149,7 @@ void State::addInfoFor(BasicBlock &BB) {
addInfoForInductions(BB);
auto &DL = BB.getDataLayout();
- // True as long as long as the current instruction is guaranteed to execute.
+ // True as long as the current instruction is guaranteed to execute.
bool GuaranteedToExecute = true;
// Queue conditions and assumes.
for (Instruction &I : BB) {
>From a1f5d8a25ab30a958ca87bf87f6a2536fcc1aede Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 27 Aug 2025 00:55:23 +0800
Subject: [PATCH 5/6] [ConstraintElim] Address review comments.
---
.../Scalar/ConstraintElimination.cpp | 6 +-
.../implied-by-bounded-memory-access.ll | 62 +++++++++----------
2 files changed, 35 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index d4b1cb80b5803..32eda0468bb64 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1119,7 +1119,7 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
Value *&B, const DataLayout &DL,
const TargetLibraryInfo &TLI) {
auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL);
- if (!Offset.NW.isInBounds())
+ if (!Offset.NW.hasNoUnsignedWrap())
return false;
if (Offset.VariableOffsets.size() != 1)
@@ -1134,6 +1134,9 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
return false;
// Index * Scale + ConstOffset + AccessSize <= AllocSize
+ // With nuw flag, we know that the index addition doesn't have unsigned wrap.
+ // If (AllocSize - (ConstOffset + AccessSize)) wraps around, there is no valid
+ // value for Index.
uint64_t BitWidth = Offset.ConstantOffset.getBitWidth();
auto &[Index, Scale] = Offset.VariableOffsets.front();
APInt MaxIndex =
@@ -2113,7 +2116,6 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F,
return PreservedAnalyses::all();
PreservedAnalyses PA;
- PA.preserve<TargetLibraryAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index e3ac4ee5d1c2a..ca7aa76866795 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -5,18 +5,18 @@
declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
-declare void @callee(i1)
+declare void @may_not_return(i1)
define i8 @load_global(i64 %idx) {
; CHECK-LABEL: define i8 @load_global(
; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
%load = load i8, ptr %gep
%cmp = icmp ult i64 %idx, 5
%zext = zext i1 %cmp to i8
@@ -27,11 +27,11 @@ define i8 @load_global(i64 %idx) {
define i1 @store_global(i64 %idx) {
; CHECK-LABEL: define i1 @store_global(
; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
; CHECK-NEXT: ret i1 true
;
- %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
store i8 0, ptr %gep
%cmp = icmp ult i64 %idx, 5
ret i1 %cmp
@@ -40,13 +40,13 @@ define i1 @store_global(i64 %idx) {
define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
; CHECK-LABEL: define i8 @load_byval(
; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %gep = getelementptr nuw i8, ptr %p, i64 %idx
%load = load i8, ptr %gep
%cmp = icmp ult i64 %idx, 5
%zext = zext i1 %cmp to i8
@@ -59,7 +59,7 @@ define i8 @load_alloca(i64 %idx) {
; CHECK-SAME: i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[ALLOC:%.*]] = alloca [5 x i8], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[ALLOC]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
@@ -67,7 +67,7 @@ define i8 @load_alloca(i64 %idx) {
;
%alloc = alloca [5 x i8], align 1
call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
- %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+ %gep = getelementptr nuw i8, ptr %alloc, i64 %idx
%load = load i8, ptr %gep
%cmp = icmp ult i64 %idx, 5
%zext = zext i1 %cmp to i8
@@ -80,7 +80,7 @@ define i8 @load_malloc(i64 %idx) {
; CHECK-SAME: i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 5)
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false)
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[ALLOC]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
@@ -89,7 +89,7 @@ define i8 @load_malloc(i64 %idx) {
;
%alloc = call ptr @malloc(i64 5)
call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false)
- %gep = getelementptr inbounds i8, ptr %alloc, i64 %idx
+ %gep = getelementptr nuw i8, ptr %alloc, i64 %idx
%load = load i8, ptr %gep
%cmp = icmp ult i64 %idx, 5
%zext = zext i1 %cmp to i8
@@ -101,13 +101,13 @@ define i8 @load_malloc(i64 %idx) {
define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
; CHECK-LABEL: define i32 @load_byval_i32(
; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i32
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i32 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %gep = getelementptr nuw i8, ptr %p, i64 %idx
%load = load i32, ptr %gep
%cmp = icmp ult i64 %idx, 7
%zext = zext i1 %cmp to i32
@@ -118,9 +118,9 @@ define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) {
define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
; CHECK-LABEL: define i8 @load_global_may_noreturn_dom_bb(
; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IDX]], 5
-; CHECK-NEXT: call void @callee(i1 [[CMP1]])
+; CHECK-NEXT: call void @may_not_return(i1 [[CMP1]])
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: br label %[[NEXT:.*]]
; CHECK: [[NEXT]]:
@@ -128,9 +128,9 @@ define i8 @load_global_may_noreturn_dom_bb(i64 %idx) {
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
%cmp1 = icmp ult i64 %idx, 5
- call void @callee(i1 %cmp1) ; %cmp1 should not be simplified.
+ call void @may_not_return(i1 %cmp1) ; %cmp1 should not be simplified.
%load = load i8, ptr %gep
br label %next
@@ -162,16 +162,16 @@ define i8 @load_from_non_gep(ptr %p, i64 %idx) {
define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
; CHECK-LABEL: define i8 @load_global_multi_indices(
; CHECK-SAME: i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]) {
-; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX1]]
-; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX1]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i8, ptr [[GEP1]], i64 [[IDX2]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP2]], align 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1]], 5
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
- %gep1 = getelementptr inbounds i8, ptr @g, i64 %idx1
- %gep2 = getelementptr inbounds i8, ptr %gep1, i64 %idx2
+ %gep1 = getelementptr nuw i8, ptr @g, i64 %idx1
+ %gep2 = getelementptr nuw i8, ptr %gep1, i64 %idx2
%load = load i8, ptr %gep2
%cmp = icmp ult i64 %idx1, 5
%zext = zext i1 %cmp to i8
@@ -179,8 +179,8 @@ define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) {
ret i8 %add
}
-define i8 @load_global_without_inbounds(i64 %idx) {
-; CHECK-LABEL: define i8 @load_global_without_inbounds(
+define i8 @load_global_without_nuw(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_without_nuw(
; CHECK-SAME: i64 [[IDX:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
@@ -200,14 +200,14 @@ define i8 @load_global_without_inbounds(i64 %idx) {
define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
; CHECK-LABEL: define i32 @load_byval_i32_smaller_range(
; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i32 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr %p, i64 %idx
+ %gep = getelementptr nuw i8, ptr %p, i64 %idx
%load = load i32, ptr %gep
%cmp = icmp ult i64 %idx, 6
%zext = zext i1 %cmp to i32
@@ -218,14 +218,14 @@ define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) {
define i8 @load_global_volatile(i64 %idx) {
; CHECK-LABEL: define i8 @load_global_volatile(
; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
%load = load volatile i8, ptr %gep
%cmp = icmp ult i64 %idx, 5
%zext = zext i1 %cmp to i8
@@ -236,7 +236,7 @@ define i8 @load_global_volatile(i64 %idx) {
define i8 @load_global_vscale(i64 %idx) {
; CHECK-LABEL: define i8 @load_global_vscale(
; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 1 x i8>, ptr [[GEP]], align 1
; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 1 x i8> [[LOAD]], i64 0
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
@@ -244,7 +244,7 @@ define i8 @load_global_vscale(i64 %idx) {
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[EXT]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr @g, i64 %idx
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
%load = load <vscale x 1 x i8>, ptr %gep
%ext = extractelement <vscale x 1 x i8> %load, i64 0
%cmp = icmp ult i64 %idx, 5
@@ -256,14 +256,14 @@ define i8 @load_global_vscale(i64 %idx) {
define i8 @load_from_null(i64 %idx) {
; CHECK-LABEL: define i8 @load_from_null(
; CHECK-SAME: i64 [[IDX:%.*]]) {
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr null, i64 [[IDX]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
; CHECK-NEXT: ret i8 [[ADD]]
;
- %gep = getelementptr inbounds i8, ptr null, i64 %idx
+ %gep = getelementptr nuw i8, ptr null, i64 %idx
%load = load i8, ptr %gep
%cmp = icmp ult i64 %idx, 5
%zext = zext i1 %cmp to i8
>From d28609be7d6a0862791a9d3a0e2d4175c8ff13f3 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 31 Aug 2025 14:33:35 +0800
Subject: [PATCH 6/6] [ConstraintElim] Address review comments.
---
.../Scalar/ConstraintElimination.cpp | 7 +-
.../implied-by-bounded-memory-access.ll | 82 +++++++++++++++++++
2 files changed, 87 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 32eda0468bb64..ffa96826129be 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1128,6 +1128,9 @@ static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
ObjectSizeOpts Opts;
// Workaround for gep inbounds, ptr null, idx.
Opts.NullIsUnknownSize = true;
+ // Be conservative since we are not clear on whether an out of bounds access
+ // to the padding is UB or not.
+ Opts.RoundToAlign = true;
ObjectSizeOffsetVisitor Visitor(DL, &TLI, GEP.getContext(), Opts);
SizeOffsetAPInt Data = Visitor.compute(Offset.BasePtr);
if (!Data.bothKnown() || !Data.Offset.isZero())
@@ -1191,11 +1194,11 @@ void State::addInfoFor(BasicBlock &BB) {
};
if (auto *LI = dyn_cast<LoadInst>(&I)) {
- if (LI->isSimple())
+ if (!LI->isVolatile())
AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType());
}
if (auto *SI = dyn_cast<StoreInst>(&I)) {
- if (SI->isSimple())
+ if (!SI->isVolatile())
AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType());
}
diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
index ca7aa76866795..5338ac52db962 100644
--- a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
+++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll
@@ -2,6 +2,8 @@
; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
@g = private unnamed_addr constant [5 x i8] c"test\00"
+ at g_overaligned = private unnamed_addr constant [5 x i8] c"test\00", align 8
+ at g_external = external global [5 x i8]
declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc"
declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc"
@@ -24,6 +26,23 @@ define i8 @load_global(i64 %idx) {
ret i8 %add
}
+define i8 @load_global_atomic(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_atomic(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load atomic i8, ptr [[GEP]] unordered, align 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
+ %load = load atomic i8, ptr %gep unordered, align 1
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
define i1 @store_global(i64 %idx) {
; CHECK-LABEL: define i1 @store_global(
; CHECK-SAME: i64 [[IDX:%.*]]) {
@@ -37,6 +56,19 @@ define i1 @store_global(i64 %idx) {
ret i1 %cmp
}
+define i1 @store_global_atomic(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global_atomic(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: store atomic i8 0, ptr [[GEP]] release, align 1
+; CHECK-NEXT: ret i1 true
+;
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
+ store atomic i8 0, ptr %gep release, align 1
+ %cmp = icmp ult i64 %idx, 5
+ ret i1 %cmp
+}
+
define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) {
; CHECK-LABEL: define i8 @load_byval(
; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) {
@@ -143,6 +175,42 @@ next:
; Negative tests.
+define i8 @load_global_overaligned(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_overaligned(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g_overaligned, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr nuw i8, ptr @g_overaligned, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
+define i8 @load_global_external(i64 %idx) {
+; CHECK-LABEL: define i8 @load_global_external(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g_external, i64 [[IDX]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %gep = getelementptr nuw i8, ptr @g_external, i64 %idx
+ %load = load i8, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ %zext = zext i1 %cmp to i8
+ %add = add i8 %load, %zext
+ ret i8 %add
+}
+
define i8 @load_from_non_gep(ptr %p, i64 %idx) {
; CHECK-LABEL: define i8 @load_from_non_gep(
; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) {
@@ -233,6 +301,20 @@ define i8 @load_global_volatile(i64 %idx) {
ret i8 %add
}
+define i1 @store_global_volatile(i64 %idx) {
+; CHECK-LABEL: define i1 @store_global_volatile(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]]
+; CHECK-NEXT: store volatile i8 0, ptr [[GEP]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr nuw i8, ptr @g, i64 %idx
+ store volatile i8 0, ptr %gep
+ %cmp = icmp ult i64 %idx, 5
+ ret i1 %cmp
+}
+
define i8 @load_global_vscale(i64 %idx) {
; CHECK-LABEL: define i8 @load_global_vscale(
; CHECK-SAME: i64 [[IDX:%.*]]) {
More information about the llvm-commits
mailing list