[llvm] Allow unfolding gep(phi(gep,gep)) (PR #178126)

Mon Jan 26 23:11:49 PST 2026

https://github.com/thetheodor created https://github.com/llvm/llvm-project/pull/178126

Unfolding a Phi node with a GEP argument through an outer GEP is currently not allowed. The reason is that the inner GEP might point back to the PHI node itself (e.g., in case of a loop) and form a cycle, which would be infinitely unfolded. This limitation prevents SROA from folding multiple GEPs even in simple if-else cases. To lift this limination we check if the underlying pointer of the inner GEP does not point back to the PHI node, if so we allow the unfolding:

gep (phi (gep ptr1, idx1), (gep ptr2, idx2)), idx
=> phi ((gep (gep ptr1, idx1), idx), (gep (gep ptr2, idx2), idx))

>From 837f5fec18c33a6c3f5b70824326c65ad1d1f4cf Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Thu, 8 Jan 2026 15:43:54 +0000
Subject: [PATCH] Allow unfolding gep(phi(gep,gep))

Unfolding a Phi node with a GEP argument through an outer GEP is
currently not allowed. The reason is that the inner GEP might point back
to the PHI node itself (e.g., in case of a loop) and form a cycle, which
would be infinitely unfolded. This limitation prevents SROA from folding
multiple GEPs even in simple if-else cases. To lift this limination we
check if the underlying pointer of the inner GEP does not point back to
the PHI node, if so we allow the unfolding:

gep (phi (gep ptr1, idx1), (gep ptr2, idx2)), idx
=> phi ((gep (gep ptr1, idx1), idx), (gep (gep ptr2, idx2), idx))
---
 llvm/lib/Transforms/Scalar/SROA.cpp  |  28 +++--
 llvm/test/Transforms/SROA/phi-gep.ll | 155 +++++++++++++++++++++++++++
 2 files changed, 176 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 83eabdae3db7f..57a30c5bf6340 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -4448,16 +4448,23 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
   //   => phi ((gep ptr1, idx), (gep ptr2, idx))
   // and  gep ptr, (phi idx1, idx2)
   //   => phi ((gep ptr, idx1), (gep ptr, idx2))
+  //
+  // Also handles GEPs as PHI incoming values when non-cyclic:
+  //   gep (phi (gep ptr1, idx1), (gep ptr2, idx2)), idx
+  //   => phi ((gep (gep ptr1, idx1), idx), (gep (gep ptr2, idx2), idx))
   bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
-    // To prevent infinitely expanding recursive phis, bail if the GEP pointer
-    // operand (looking through the phi if it is the phi we want to unfold) is
-    // an instruction besides a static alloca.
+    // To prevent infinitely expanding recursive phis, only allow GEP pointer
+    // operands (looking through the phi if it is the phi we want to unfold)
+    // that are static allocas or GEPs that do not point back
+    // to the PHI node itself (i.e. they don't form a cycle).
     PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
-    auto IsInvalidPointerOperand = [](Value *V) {
+    auto IsInvalidPointerOperand = [Phi](Value *V) {
       if (!isa<Instruction>(V))
         return false;
       if (auto *AI = dyn_cast<AllocaInst>(V))
         return !AI->isStaticAlloca();
+      if (isa<GetElementPtrInst>(V) && Phi && getUnderlyingObject(V, 0) != Phi)
+        return false;
       return true;
     };
     if (Phi) {
@@ -4507,9 +4514,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
                                     Phi->getName() + ".sroa.phi");
 
     Type *SourceTy = GEPI.getSourceElementType();
-    // We only handle arguments, constants, and static allocas here, so we can
-    // insert GEPs at the end of the entry block.
-    IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
     for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
       Value *Op = Phi->getIncomingValue(I);
       BasicBlock *BB = Phi->getIncomingBlock(I);
@@ -4518,6 +4522,16 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
         NewGEP = NewPhi->getIncomingValue(NI);
       } else {
         SmallVector<Value *> NewOps = GetNewOps(Op);
+
+        // For arguments, constants, and static allocas, we insert GEPs at the
+        // end of the entry block. For GEP incoming values, we insert right
+        // after the GEP to ensure proper dominance.
+        if (auto *OpGEP = dyn_cast<GetElementPtrInst>(NewOps[0])) {
+          IRB.SetInsertPoint(OpGEP->getNextNode());
+        } else {
+          IRB.SetInsertPoint(
+              GEPI.getFunction()->getEntryBlock().getTerminator());
+        }
         NewGEP =
             IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
                           Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll
index 45c3bbdeb3897..e62aa2afaf44d 100644
--- a/llvm/test/Transforms/SROA/phi-gep.ll
+++ b/llvm/test/Transforms/SROA/phi-gep.ll
@@ -705,6 +705,161 @@ bb4:
   ret i32 %load
 }
 
+define i32 @test_gep_phi_gep(i1 %cond) {
+; CHECK-LABEL: @test_gep_phi_gep(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi i32 [ 1000, [[THEN]] ], [ 5000, [[ELSE]] ]
+; CHECK-NEXT:    ret i32 [[PHI_SROA_PHI_SROA_SPECULATED]]
+;
+entry:
+  %a = alloca [2 x i32], align 4
+  %b = alloca [2 x i32], align 4
+  %a1 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
+  %b1 = getelementptr inbounds [2 x i32], ptr %b, i64 0, i64 1
+  store i32 1000, ptr %a1, align 4
+  store i32 5000, ptr %b1, align 4
+  %gep_a = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 0
+  %gep_b = getelementptr inbounds [2 x i32], ptr %b, i64 0, i64 0
+  br i1 %cond, label %then, label %else
+
+then:
+  br label %merge
+
+else:
+  br label %merge
+
+merge:
+  %phi = phi ptr [ %gep_a, %then ], [ %gep_b, %else ]
+  %elem1 = getelementptr inbounds i32, ptr %phi, i64 1
+  %val = load i32, ptr %elem1, align 4
+  ret i32 %val
+}
+
+define i32 @test_gep_phi_gep_cycle(i1 %cond) {
+; CHECK-LABEL: @test_gep_phi_gep_cycle(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca [4 x i32], align 4
+; CHECK-NEXT:    [[A0:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 0, i64 0
+; CHECK-NEXT:    store i32 42, ptr [[A0]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[A0]], [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP]] = getelementptr inbounds i32, ptr [[PHI]], i64 1
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PHI]], align 4
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+entry:
+  %a = alloca [4 x i32], align 4
+  %a0 = getelementptr inbounds [4 x i32], ptr %a, i64 0, i64 0
+  store i32 42, ptr %a0, align 4
+  br label %loop
+
+loop:
+  %phi = phi ptr [ %a0, %entry ], [ %gep, %loop ]
+  %gep = getelementptr inbounds i32, ptr %phi, i64 1
+  %val = load i32, ptr %phi, align 4
+  %done = icmp eq i32 %val, 0
+  br i1 %done, label %exit, label %loop
+
+exit:
+  ret i32 %val
+}
+
+define i32 @test_gep_phi_gep_non_alloca_operands(i1 %cond) {
+; CHECK-LABEL: @test_gep_phi_gep_non_alloca_operands(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca [4 x i32], align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca [4 x i32], align 4
+; CHECK-NEXT:    call void @use(ptr [[A]])
+; CHECK-NEXT:    call void @use(ptr [[B]])
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[PHI_SROA_GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_A]], i64 1
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[GEP_B:%.*]] = getelementptr inbounds [4 x i32], ptr [[B]], i64 0, i64 1
+; CHECK-NEXT:    [[PHI_SROA_GEP1:%.*]] = getelementptr inbounds i32, ptr [[GEP_B]], i64 1
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[PHI_SROA_PHI:%.*]] = phi ptr [ [[PHI_SROA_GEP]], [[THEN]] ], [ [[PHI_SROA_GEP1]], [[ELSE]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[GEP_A]], [[THEN]] ], [ [[GEP_B]], [[ELSE]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PHI_SROA_PHI]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+entry:
+  %a = alloca [4 x i32], align 4
+  %b = alloca [4 x i32], align 4
+  call void @use(ptr %a)
+  call void @use(ptr %b)
+  br i1 %cond, label %then, label %else
+
+then:
+  %gep_a = getelementptr inbounds [4 x i32], ptr %a, i64 0, i64 1
+  br label %merge
+
+else:
+  %gep_b = getelementptr inbounds [4 x i32], ptr %b, i64 0, i64 1
+  br label %merge
+
+merge:
+  %phi = phi ptr [ %gep_a, %then ], [ %gep_b, %else ]
+  %elem1 = getelementptr inbounds i32, ptr %phi, i64 1
+  %val = load i32, ptr %elem1, align 4
+  ret i32 %val
+}
+
+define i32 @test_gep_phi_alloca_and_non_alloca_operands(i1 %cond, ptr %arg) {
+; CHECK-LABEL: @test_gep_phi_alloca_and_non_alloca_operands(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 4
+; CHECK-NEXT:    store i32 42, ptr [[A]], align 4
+; CHECK-NEXT:    call void @use(ptr [[A]])
+; CHECK-NEXT:    [[PHI_SROA_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[GEP_ARG:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 0
+; CHECK-NEXT:    [[PHI_SROA_GEP1:%.*]] = getelementptr inbounds i32, ptr [[GEP_ARG]], i64 1
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[PHI_SROA_PHI:%.*]] = phi ptr [ [[PHI_SROA_GEP]], [[THEN]] ], [ [[PHI_SROA_GEP1]], [[ELSE]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[A]], [[THEN]] ], [ [[GEP_ARG]], [[ELSE]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PHI_SROA_PHI]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+entry:
+  %a = alloca [2 x i32], align 4
+  store i32 42, ptr %a, align 4
+  call void @use(ptr %a)
+  br i1 %cond, label %then, label %else
+
+then:
+  br label %merge
+
+else:
+  %gep_arg = getelementptr inbounds i32, ptr %arg, i64 0
+  br label %merge
+
+merge:
+  %phi = phi ptr [ %a, %then ], [ %gep_arg, %else ]
+  %elem1 = getelementptr inbounds i32, ptr %phi, i64 1
+  %val = load i32, ptr %elem1, align 4
+  ret i32 %val
+}
+
+declare void @use(ptr)
+
 define i64 @test_unfold_phi_duplicate_phi_entry(ptr %arg, i8 %arg1, i1 %arg2) {
 ; CHECK-LABEL: @test_unfold_phi_duplicate_phi_entry(
 ; CHECK-NEXT:  bb: