[llvm] [InstCombine] Modify zero-indexed GEPs in place rather than cloning (PR #185053)

Drew Kersnar via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 6 09:17:04 PST 2026


https://github.com/dakersnar created https://github.com/llvm/llvm-project/pull/185053

When analyzing operands of loads/stores, if we can guarantee that a GEP is always zero-indexed, it is better to modify the GEP such that other users can take advantage of the simplification, rather than just cloning it for one specific load/store user.

Without this change, replaceGEPIdxWithZero clones the GEP for the triggering load/store, leaving the original variable-indexed GEP in place. Other users of that GEP (e.g., a constant-offset GEP feeding a second load) miss the simplification. Testcase demonstrates this: without the first load _modifying_ the gep, the _second_ load will still be dependent on both GEPs, and thus unnecessarily dependent on the %idx. This lack of simplification can cause issues with later passes such as LICM.

Alternative approaches could be to add a version of this transform into visitGEP, but there is precedent to doing so in visitLoad/visitStore, see simplifyNonNullOperand. And because the optimization is tied to the dereference that happens in the load/store, I think it reasonably fits here.

Alive2 proof: https://alive2.llvm.org/ce/z/-HZd9c

Alive2 counterexample showing why we cannot blindly modify the gep in place without some sort of condition: https://alive2.llvm.org/ce/z/dzKuc3


>From a27a15970bcd7dff67b34bf10ca6b4f035a01ba8 Mon Sep 17 00:00:00 2001
From: Drew Kersnar <dkersnar at nvidia.com>
Date: Thu, 5 Mar 2026 23:56:52 +0000
Subject: [PATCH 1/2] Pre-change test addition

---
 .../gep-replace-idx-zero-multi-use.ll         | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll

diff --git a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
new file mode 100644
index 0000000000000..64758acd1ead8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+%struct.pair = type { i64, i64 }
+
+declare void @init(ptr)
+
+; When a variable-indexed inbounds GEP into a single-element allocation is
+; dereferenced, the index is provably zero. If the GEP and the dereference
+; are in the same basic block, replaceGEPIdxWithZero should modify the GEP
+; in place so that all users benefit -- not just the triggering load/store.
+;
+; Here %gep feeds both a direct load and a constant-offset GEP (%off) used
+; by a second load. Both should resolve to constant offsets from %base.
+define i64 @gep_idx_zero_multi_use(i64 %idx) {
+; CHECK-LABEL: define i64 @gep_idx_zero_multi_use(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT:    call void @init(ptr nonnull [[BASE]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, ptr [[BASE]], align 16
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, ptr [[OFF]], align 8
+; CHECK-NEXT:    [[SUM:%.*]] = add i64 [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT:    ret i64 [[SUM]]
+;
+  %base = alloca [1 x %struct.pair], align 16
+  call void @init(ptr %base)
+  %gep = getelementptr inbounds [1 x %struct.pair], ptr %base, i64 %idx
+  %load1 = load i64, ptr %gep, align 16
+  %off = getelementptr inbounds i8, ptr %gep, i64 8
+  %load2 = load i64, ptr %off, align 8
+  %sum = add i64 %load1, %load2
+  ret i64 %sum
+}
+
+; Same pattern but with a store as the triggering memory instruction.
+define i64 @gep_idx_zero_multi_use_store(i64 %idx, i64 %val) {
+; CHECK-LABEL: define i64 @gep_idx_zero_multi_use_store(
+; CHECK-SAME: i64 [[IDX:%.*]], i64 [[VAL:%.*]]) {
+; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT:    store i64 [[VAL]], ptr [[BASE]], align 16
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[OFF]], align 8
+; CHECK-NEXT:    ret i64 [[LOAD]]
+;
+  %base = alloca [1 x %struct.pair], align 16
+  %gep = getelementptr inbounds [1 x %struct.pair], ptr %base, i64 %idx
+  store i64 %val, ptr %gep, align 16
+  %off = getelementptr inbounds i8, ptr %gep, i64 8
+  %load = load i64, ptr %off, align 8
+  ret i64 %load
+}

>From 67f31f1545da2dec3253332f4cc130d2833a7a2b Mon Sep 17 00:00:00 2001
From: Drew Kersnar <dkersnar at nvidia.com>
Date: Fri, 6 Mar 2026 16:34:16 +0000
Subject: [PATCH 2/2] [InstCombine] Modify zero-indexed GEPs in place rather
 than cloning

---
 .../InstCombine/InstCombineLoadStoreAlloca.cpp        | 11 +++++++++++
 .../InstCombine/gep-replace-idx-zero-multi-use.ll     |  6 ++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 277f81245ade2..4ba6d37893d5c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -991,6 +991,17 @@ static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
     unsigned Idx;
     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+      // If the memory instruction is guaranteed to execute whenever the GEP
+      // does, the dereference proves the index is unconditionally zero.
+      // Modify the GEP in place so all users benefit.
+      if (GEPI->getParent() == MemI.getParent() &&
+          isGuaranteedToTransferExecutionToSuccessor(GEPI->getIterator(),
+                                                     MemI.getIterator())) {
+        IC.replaceOperand(
+            *GEPI, Idx, ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+        IC.addToWorklist(GEPI);
+        return GEPI;
+      }
       Instruction *NewGEPI = GEPI->clone();
       NewGEPI->setOperand(Idx,
         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
diff --git a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
index 64758acd1ead8..9f9d114e40e49 100644
--- a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
+++ b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
@@ -17,9 +17,8 @@ define i64 @gep_idx_zero_multi_use(i64 %idx) {
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
 ; CHECK-NEXT:    call void @init(ptr nonnull [[BASE]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, ptr [[BASE]], align 16
-; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 8
 ; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, ptr [[OFF]], align 8
 ; CHECK-NEXT:    [[SUM:%.*]] = add i64 [[LOAD1]], [[LOAD2]]
 ; CHECK-NEXT:    ret i64 [[SUM]]
@@ -39,9 +38,8 @@ define i64 @gep_idx_zero_multi_use_store(i64 %idx, i64 %val) {
 ; CHECK-LABEL: define i64 @gep_idx_zero_multi_use_store(
 ; CHECK-SAME: i64 [[IDX:%.*]], i64 [[VAL:%.*]]) {
 ; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
 ; CHECK-NEXT:    store i64 [[VAL]], ptr [[BASE]], align 16
-; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 8
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[OFF]], align 8
 ; CHECK-NEXT:    ret i64 [[LOAD]]
 ;



More information about the llvm-commits mailing list