[llvm] [InstCombine] Modify zero-indexed GEPs in place rather than cloning (PR #185053)

Drew Kersnar via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 6 10:51:55 PST 2026


https://github.com/dakersnar updated https://github.com/llvm/llvm-project/pull/185053

>From a27a15970bcd7dff67b34bf10ca6b4f035a01ba8 Mon Sep 17 00:00:00 2001
From: Drew Kersnar <dkersnar at nvidia.com>
Date: Thu, 5 Mar 2026 23:56:52 +0000
Subject: [PATCH 1/4] Pre-change test addition

---
 .../gep-replace-idx-zero-multi-use.ll         | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll

diff --git a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
new file mode 100644
index 0000000000000..64758acd1ead8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+%struct.pair = type { i64, i64 }
+
+declare void @init(ptr)
+
+; When a variable-indexed inbounds GEP into a single-element allocation is
+; dereferenced, the index is provably zero. If the GEP and the dereference
+; are in the same basic block, replaceGEPIdxWithZero should modify the GEP
+; in place so that all users benefit -- not just the triggering load/store.
+;
+; Here %gep feeds both a direct load and a constant-offset GEP (%off) used
+; by a second load. Both should resolve to constant offsets from %base.
+define i64 @gep_idx_zero_multi_use(i64 %idx) {
+; CHECK-LABEL: define i64 @gep_idx_zero_multi_use(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT:    call void @init(ptr nonnull [[BASE]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, ptr [[BASE]], align 16
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, ptr [[OFF]], align 8
+; CHECK-NEXT:    [[SUM:%.*]] = add i64 [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT:    ret i64 [[SUM]]
+;
+  %base = alloca [1 x %struct.pair], align 16
+  call void @init(ptr %base)
+  %gep = getelementptr inbounds [1 x %struct.pair], ptr %base, i64 %idx
+  %load1 = load i64, ptr %gep, align 16
+  %off = getelementptr inbounds i8, ptr %gep, i64 8
+  %load2 = load i64, ptr %off, align 8
+  %sum = add i64 %load1, %load2
+  ret i64 %sum
+}
+
+; Same pattern but with a store as the triggering memory instruction.
+define i64 @gep_idx_zero_multi_use_store(i64 %idx, i64 %val) {
+; CHECK-LABEL: define i64 @gep_idx_zero_multi_use_store(
+; CHECK-SAME: i64 [[IDX:%.*]], i64 [[VAL:%.*]]) {
+; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT:    store i64 [[VAL]], ptr [[BASE]], align 16
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[OFF]], align 8
+; CHECK-NEXT:    ret i64 [[LOAD]]
+;
+  %base = alloca [1 x %struct.pair], align 16
+  %gep = getelementptr inbounds [1 x %struct.pair], ptr %base, i64 %idx
+  store i64 %val, ptr %gep, align 16
+  %off = getelementptr inbounds i8, ptr %gep, i64 8
+  %load = load i64, ptr %off, align 8
+  ret i64 %load
+}

>From 67f31f1545da2dec3253332f4cc130d2833a7a2b Mon Sep 17 00:00:00 2001
From: Drew Kersnar <dkersnar at nvidia.com>
Date: Fri, 6 Mar 2026 16:34:16 +0000
Subject: [PATCH 2/4] [InstCombine] Modify zero-indexed GEPs in place rather
 than cloning

---
 .../InstCombine/InstCombineLoadStoreAlloca.cpp        | 11 +++++++++++
 .../InstCombine/gep-replace-idx-zero-multi-use.ll     |  6 ++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 277f81245ade2..4ba6d37893d5c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -991,6 +991,17 @@ static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
     unsigned Idx;
     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+      // If the memory instruction is guaranteed to execute whenever the GEP
+      // does, the dereference proves the index is unconditionally zero.
+      // Modify the GEP in place so all users benefit.
+      if (GEPI->getParent() == MemI.getParent() &&
+          isGuaranteedToTransferExecutionToSuccessor(GEPI->getIterator(),
+                                                     MemI.getIterator())) {
+        IC.replaceOperand(
+            *GEPI, Idx, ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+        IC.addToWorklist(GEPI);
+        return GEPI;
+      }
       Instruction *NewGEPI = GEPI->clone();
       NewGEPI->setOperand(Idx,
         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
diff --git a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
index 64758acd1ead8..9f9d114e40e49 100644
--- a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
+++ b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
@@ -17,9 +17,8 @@ define i64 @gep_idx_zero_multi_use(i64 %idx) {
 ; CHECK-SAME: i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
 ; CHECK-NEXT:    call void @init(ptr nonnull [[BASE]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, ptr [[BASE]], align 16
-; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 8
 ; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, ptr [[OFF]], align 8
 ; CHECK-NEXT:    [[SUM:%.*]] = add i64 [[LOAD1]], [[LOAD2]]
 ; CHECK-NEXT:    ret i64 [[SUM]]
@@ -39,9 +38,8 @@ define i64 @gep_idx_zero_multi_use_store(i64 %idx, i64 %val) {
 ; CHECK-LABEL: define i64 @gep_idx_zero_multi_use_store(
 ; CHECK-SAME: i64 [[IDX:%.*]], i64 [[VAL:%.*]]) {
 ; CHECK-NEXT:    [[BASE:%.*]] = alloca [1 x [[STRUCT_PAIR:%.*]]], align 16
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x [[STRUCT_PAIR]]], ptr [[BASE]], i64 [[IDX]]
 ; CHECK-NEXT:    store i64 [[VAL]], ptr [[BASE]], align 16
-; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 8
+; CHECK-NEXT:    [[OFF:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 8
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[OFF]], align 8
 ; CHECK-NEXT:    ret i64 [[LOAD]]
 ;

>From fb0bb71e6a4d423d00b3cc2fa3371d171909314a Mon Sep 17 00:00:00 2001
From: Drew Kersnar <dkersnar at nvidia.com>
Date: Fri, 6 Mar 2026 18:39:06 +0000
Subject: [PATCH 3/4] Reviewer feedback, RAUW instead of modify in place

---
 .../InstCombine/InstCombineLoadStoreAlloca.cpp | 18 +++++++-----------
 .../gep-replace-idx-zero-multi-use.ll          |  4 ++--
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4ba6d37893d5c..e9e324bc61880 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -991,21 +991,17 @@ static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
     unsigned Idx;
     if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
-      // If the memory instruction is guaranteed to execute whenever the GEP
-      // does, the dereference proves the index is unconditionally zero.
-      // Modify the GEP in place so all users benefit.
-      if (GEPI->getParent() == MemI.getParent() &&
-          isGuaranteedToTransferExecutionToSuccessor(GEPI->getIterator(),
-                                                     MemI.getIterator())) {
-        IC.replaceOperand(
-            *GEPI, Idx, ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
-        IC.addToWorklist(GEPI);
-        return GEPI;
-      }
       Instruction *NewGEPI = GEPI->clone();
       NewGEPI->setOperand(Idx,
         ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
       IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
+      // If the memory instruction is guaranteed to execute whenever the GEP
+      // does, the dereference proves the index is unconditionally zero.
+      // Replace the GEP for all users so they all benefit.
+      if (GEPI->getParent() == MemI.getParent() &&
+          isGuaranteedToTransferExecutionToSuccessor(GEPI->getIterator(),
+                                                     MemI.getIterator()))
+        IC.replaceInstUsesWith(*GEPI, NewGEPI);
       return NewGEPI;
     }
   }
diff --git a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
index 9f9d114e40e49..6f9c1ee3ac6c0 100644
--- a/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
+++ b/llvm/test/Transforms/InstCombine/gep-replace-idx-zero-multi-use.ll
@@ -7,8 +7,8 @@ declare void @init(ptr)
 
 ; When a variable-indexed inbounds GEP into a single-element allocation is
 ; dereferenced, the index is provably zero. If the GEP and the dereference
-; are in the same basic block, replaceGEPIdxWithZero should modify the GEP
-; in place so that all users benefit -- not just the triggering load/store.
+; are in the same basic block, replaceGEPIdxWithZero should replace the GEP
+; for all users -- not just the triggering load/store.
 ;
 ; Here %gep feeds both a direct load and a constant-offset GEP (%off) used
 ; by a second load. Both should resolve to constant offsets from %base.

>From 755968bc806d9e3fc7ee26eca46de7901a7f8f51 Mon Sep 17 00:00:00 2001
From: Drew Kersnar <dkersnar at nvidia.com>
Date: Fri, 6 Mar 2026 18:51:28 +0000
Subject: [PATCH 4/4] eraseInst is needed to prevent LLVM ERROR: Instruction
 Combining on gep_idx_zero_multi_use did not reach a fixpoint after 1
 iterations.

---
 .../lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e9e324bc61880..576a554794111 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1000,8 +1000,10 @@ static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
       // Replace the GEP for all users so they all benefit.
       if (GEPI->getParent() == MemI.getParent() &&
           isGuaranteedToTransferExecutionToSuccessor(GEPI->getIterator(),
-                                                     MemI.getIterator()))
+                                                     MemI.getIterator())) {
         IC.replaceInstUsesWith(*GEPI, NewGEPI);
+        IC.eraseInstFromFunction(*GEPI);
+      }
       return NewGEPI;
     }
   }



More information about the llvm-commits mailing list