[llvm] a4ef581 - [InstCombine] RAUW for proven zero-indexed GEPs rather than cloning for a specific user (#185053)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 18 07:47:57 PDT 2026
Author: Drew Kersnar
Date: 2026-03-18T09:47:51-05:00
New Revision: a4ef581c714b939ee692d1cff212c48a9ae3a000
URL: https://github.com/llvm/llvm-project/commit/a4ef581c714b939ee692d1cff212c48a9ae3a000
DIFF: https://github.com/llvm/llvm-project/commit/a4ef581c714b939ee692d1cff212c48a9ae3a000.diff
LOG: [InstCombine] RAUW for proven zero-indexed GEPs rather than cloning for a specific user (#185053)
When analyzing operands of loads/stores, if we can guarantee that a GEP
is always zero-indexed, it is better to modify the GEP such that other
users can take advantage of the simplification, rather than just cloning
it for one specific load/store user. Edit: implementation changed to
call replaceInstUsesWith instead of modifying in place.
Without this change, replaceGEPIdxWithZero clones the GEP for the
triggering load/store, leaving the original variable-indexed GEP in
place. Other users of that GEP (e.g., a constant-offset GEP feeding a
second load) miss the simplification. Testcase demonstrates this:
without the first load _modifying_ the gep, the _second_ load will still
be dependent on both GEPs, and thus unnecessarily dependent on the %idx.
This lack of simplification can cause issues with later passes such as
LICM.
Alternative approaches could be to add a version of this transform into
visitGEP, but there is precedent to doing so in visitLoad/visitStore,
see simplifyNonNullOperand. And because the optimization is tied to the
dereference that happens in the load/store, I think it reasonably fits
here.
Alive2 proof: https://alive2.llvm.org/ce/z/-HZd9c
Alive2 counterexample showing why we cannot blindly modify the gep in
place without some sort of condition:
https://alive2.llvm.org/ce/z/dzKuc3
Added:
llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
Modified:
llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 769ad433650b4..7d52252a53425 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -995,6 +995,15 @@ static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
NewGEPI->setOperand(Idx,
ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
+ // If the memory instruction is guaranteed to execute whenever the GEP
+ // does, the dereference proves the index is unconditionally zero.
+ // Replace the GEP for all users so they all benefit.
+ if (GEPI->getParent() == MemI.getParent() &&
+ isGuaranteedToTransferExecutionToSuccessor(GEPI->getIterator(),
+ MemI.getIterator())) {
+ IC.replaceInstUsesWith(*GEPI, NewGEPI);
+ IC.eraseInstFromFunction(*GEPI);
+ }
return NewGEPI;
}
}
diff --git a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
new file mode 100644
index 0000000000000..fa54f255c600e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+%struct.pair = type { i64, i64 }
+
+declare void @init(ptr)
+declare void @use_ptr(ptr)
+
+; When a variable-indexed inbounds GEP into a single-element allocation is
+; dereferenced, the index is provably zero. If the GEP and the dereference
+; are in the same basic block, replaceGEPIdxWithZero should replace the GEP
+; for all users -- not just the triggering load/store.
+;
+; Here %gep feeds both a direct load and a constant-offset GEP (%off) used
+; by a second load. Both should resolve to constant offsets from %base.
+define i64 @gep_idx_zero_multi_use(i64 %idx) {
+; CHECK-LABEL: define i64 @gep_idx_zero_multi_use(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT: call void @init(ptr nonnull [[BASE]])
+; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[BASE]], align 16
+; CHECK-NEXT: [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 8
+; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[OFF]], align 8
+; CHECK-NEXT: [[SUM:%.*]] = add i64 [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: ret i64 [[SUM]]
+;
+ %base = alloca [1 x %struct.pair], align 16
+ call void @init(ptr %base)
+ %gep = getelementptr inbounds [16 x i8], ptr %base, i64 %idx
+ %load1 = load i64, ptr %gep, align 16
+ %off = getelementptr inbounds i8, ptr %gep, i64 8
+ %load2 = load i64, ptr %off, align 8
+ %sum = add i64 %load1, %load2
+ ret i64 %sum
+}
+
+; Same pattern but with a store as the triggering memory instruction.
+define i64 @gep_idx_zero_multi_use_store(i64 %idx, i64 %val) {
+; CHECK-LABEL: define i64 @gep_idx_zero_multi_use_store(
+; CHECK-SAME: i64 [[IDX:%.*]], i64 [[VAL:%.*]]) {
+; CHECK-NEXT: [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT: store i64 [[VAL]], ptr [[BASE]], align 16
+; CHECK-NEXT: [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 8
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[OFF]], align 8
+; CHECK-NEXT: ret i64 [[LOAD]]
+;
+ %base = alloca [1 x %struct.pair], align 16
+ %gep = getelementptr inbounds [16 x i8], ptr %base, i64 %idx
+ store i64 %val, ptr %gep, align 16
+ %off = getelementptr inbounds i8, ptr %gep, i64 8
+ %load = load i64, ptr %off, align 8
+ ret i64 %load
+}
+
+; -------------------------- NEGATIVE CASES --------------------------
+
+; When the GEP and the dereference are in
diff erent basic blocks, the
+; dereference does not prove the index is zero on all paths (the GEP
+; could be reached without being dereferenced). replaceGEPIdxWithZero
+; must NOT replace the GEP for all users; only the triggering load
+; gets a zero-indexed clone. Here, the else branch passes %gep to a
+; call where %idx could validly be 1 (one-past-the-end, not
+; dereferenced), so the original GEP must be preserved.
+define i64 @gep_idx_zero_
diff erent_bb(i64 %idx, i1 %cond) {
+; CHECK-LABEL: define i64 @gep_idx_zero_
diff erent_bb(
+; CHECK-SAME: i64 [[IDX:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT: call void @init(ptr nonnull [[BASE]])
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[BASE]], align 16
+; CHECK-NEXT: ret i64 [[LOAD]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i8], ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT: call void @use_ptr(ptr nonnull [[GEP]])
+; CHECK-NEXT: ret i64 0
+;
+ %base = alloca [1 x %struct.pair], align 16
+ call void @init(ptr %base)
+ %gep = getelementptr inbounds [16 x i8], ptr %base, i64 %idx
+ br i1 %cond, label %then, label %else
+
+then:
+ %load = load i64, ptr %gep, align 16
+ ret i64 %load
+
+else:
+ call void @use_ptr(ptr %gep)
+ ret i64 0
+}
+
+; Same basic block, but a potentially-throwing call between the GEP and
+; the load prevents isGuaranteedToTransferExecutionToSuccessor from
+; proving the load always executes when the GEP does. @nounwind_use is
+; nounwind+willreturn so it doesn't block the transfer check; only
+; @may_throw does. Without @may_throw, RAUW would fire and @nounwind_use
+; would receive the zero-indexed pointer. With @may_throw, the GEP must
+; be preserved because @nounwind_use could observe a validly non-zero
+; index on a path where @may_throw throws before the load is reached.
+declare void @may_throw()
+declare void @nounwind_use(ptr) nounwind willreturn
+
+define i64 @gep_idx_zero_may_throw(i64 %idx) {
+; CHECK-LABEL: define i64 @gep_idx_zero_may_throw(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT: call void @init(ptr nonnull [[BASE]])
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i8], ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT: call void @nounwind_use(ptr nonnull [[GEP]])
+; CHECK-NEXT: call void @may_throw()
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[BASE]], align 16
+; CHECK-NEXT: ret i64 [[LOAD]]
+;
+ %base = alloca [1 x %struct.pair], align 16
+ call void @init(ptr %base)
+ %gep = getelementptr inbounds [16 x i8], ptr %base, i64 %idx
+ call void @nounwind_use(ptr %gep)
+ call void @may_throw()
+ %load = load i64, ptr %gep, align 16
+ ret i64 %load
+}
More information about the llvm-commits
mailing list