[llvm] Allow unfolding gep(phi(gep,gep)) (PR #178126)
Theodoros Theodoridis via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 26 23:11:49 PST 2026
https://github.com/thetheodor created https://github.com/llvm/llvm-project/pull/178126
Unfolding a Phi node with a GEP argument through an outer GEP is currently not allowed. The reason is that the inner GEP might point back to the PHI node itself (e.g., in case of a loop) and form a cycle, which would be infinitely unfolded. This limitation prevents SROA from folding multiple GEPs even in simple if-else cases. To lift this limination we check if the underlying pointer of the inner GEP does not point back to the PHI node, if so we allow the unfolding:
gep (phi (gep ptr1, idx1), (gep ptr2, idx2)), idx
=> phi ((gep (gep ptr1, idx1), idx), (gep (gep ptr2, idx2), idx))
>From 837f5fec18c33a6c3f5b70824326c65ad1d1f4cf Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Thu, 8 Jan 2026 15:43:54 +0000
Subject: [PATCH] Allow unfolding gep(phi(gep,gep))
Unfolding a Phi node with a GEP argument through an outer GEP is
currently not allowed. The reason is that the inner GEP might point back
to the PHI node itself (e.g., in case of a loop) and form a cycle, which
would be infinitely unfolded. This limitation prevents SROA from folding
multiple GEPs even in simple if-else cases. To lift this limination we
check if the underlying pointer of the inner GEP does not point back to
the PHI node, if so we allow the unfolding:
gep (phi (gep ptr1, idx1), (gep ptr2, idx2)), idx
=> phi ((gep (gep ptr1, idx1), idx), (gep (gep ptr2, idx2), idx))
---
llvm/lib/Transforms/Scalar/SROA.cpp | 28 +++--
llvm/test/Transforms/SROA/phi-gep.ll | 155 +++++++++++++++++++++++++++
2 files changed, 176 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 83eabdae3db7f..57a30c5bf6340 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -4448,16 +4448,23 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// => phi ((gep ptr1, idx), (gep ptr2, idx))
// and gep ptr, (phi idx1, idx2)
// => phi ((gep ptr, idx1), (gep ptr, idx2))
+ //
+ // Also handles GEPs as PHI incoming values when non-cyclic:
+ // gep (phi (gep ptr1, idx1), (gep ptr2, idx2)), idx
+ // => phi ((gep (gep ptr1, idx1), idx), (gep (gep ptr2, idx2), idx))
bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
- // To prevent infinitely expanding recursive phis, bail if the GEP pointer
- // operand (looking through the phi if it is the phi we want to unfold) is
- // an instruction besides a static alloca.
+ // To prevent infinitely expanding recursive phis, only allow GEP pointer
+ // operands (looking through the phi if it is the phi we want to unfold)
+ // that are static allocas or GEPs that do not point back
+ // to the PHI node itself (i.e. they don't form a cycle).
PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
- auto IsInvalidPointerOperand = [](Value *V) {
+ auto IsInvalidPointerOperand = [Phi](Value *V) {
if (!isa<Instruction>(V))
return false;
if (auto *AI = dyn_cast<AllocaInst>(V))
return !AI->isStaticAlloca();
+ if (isa<GetElementPtrInst>(V) && Phi && getUnderlyingObject(V, 0) != Phi)
+ return false;
return true;
};
if (Phi) {
@@ -4507,9 +4514,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
Phi->getName() + ".sroa.phi");
Type *SourceTy = GEPI.getSourceElementType();
- // We only handle arguments, constants, and static allocas here, so we can
- // insert GEPs at the end of the entry block.
- IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
Value *Op = Phi->getIncomingValue(I);
BasicBlock *BB = Phi->getIncomingBlock(I);
@@ -4518,6 +4522,16 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
NewGEP = NewPhi->getIncomingValue(NI);
} else {
SmallVector<Value *> NewOps = GetNewOps(Op);
+
+ // For arguments, constants, and static allocas, we insert GEPs at the
+ // end of the entry block. For GEP incoming values, we insert right
+ // after the GEP to ensure proper dominance.
+ if (auto *OpGEP = dyn_cast<GetElementPtrInst>(NewOps[0])) {
+ IRB.SetInsertPoint(OpGEP->getNextNode());
+ } else {
+ IRB.SetInsertPoint(
+ GEPI.getFunction()->getEntryBlock().getTerminator());
+ }
NewGEP =
IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll
index 45c3bbdeb3897..e62aa2afaf44d 100644
--- a/llvm/test/Transforms/SROA/phi-gep.ll
+++ b/llvm/test/Transforms/SROA/phi-gep.ll
@@ -705,6 +705,161 @@ bb4:
ret i32 %load
}
+define i32 @test_gep_phi_gep(i1 %cond) {
+; CHECK-LABEL: @test_gep_phi_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: else:
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi i32 [ 1000, [[THEN]] ], [ 5000, [[ELSE]] ]
+; CHECK-NEXT: ret i32 [[PHI_SROA_PHI_SROA_SPECULATED]]
+;
+entry:
+ %a = alloca [2 x i32], align 4
+ %b = alloca [2 x i32], align 4
+ %a1 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
+ %b1 = getelementptr inbounds [2 x i32], ptr %b, i64 0, i64 1
+ store i32 1000, ptr %a1, align 4
+ store i32 5000, ptr %b1, align 4
+ %gep_a = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 0
+ %gep_b = getelementptr inbounds [2 x i32], ptr %b, i64 0, i64 0
+ br i1 %cond, label %then, label %else
+
+then:
+ br label %merge
+
+else:
+ br label %merge
+
+merge:
+ %phi = phi ptr [ %gep_a, %then ], [ %gep_b, %else ]
+ %elem1 = getelementptr inbounds i32, ptr %phi, i64 1
+ %val = load i32, ptr %elem1, align 4
+ ret i32 %val
+}
+
+define i32 @test_gep_phi_gep_cycle(i1 %cond) {
+; CHECK-LABEL: @test_gep_phi_gep_cycle(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 4
+; CHECK-NEXT: [[A0:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 0, i64 0
+; CHECK-NEXT: store i32 42, ptr [[A0]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A0]], [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[GEP]] = getelementptr inbounds i32, ptr [[PHI]], i64 1
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PHI]], align 4
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %a = alloca [4 x i32], align 4
+ %a0 = getelementptr inbounds [4 x i32], ptr %a, i64 0, i64 0
+ store i32 42, ptr %a0, align 4
+ br label %loop
+
+loop:
+ %phi = phi ptr [ %a0, %entry ], [ %gep, %loop ]
+ %gep = getelementptr inbounds i32, ptr %phi, i64 1
+ %val = load i32, ptr %phi, align 4
+ %done = icmp eq i32 %val, 0
+ br i1 %done, label %exit, label %loop
+
+exit:
+ ret i32 %val
+}
+
+define i32 @test_gep_phi_gep_non_alloca_operands(i1 %cond) {
+; CHECK-LABEL: @test_gep_phi_gep_non_alloca_operands(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 4
+; CHECK-NEXT: [[B:%.*]] = alloca [4 x i32], align 4
+; CHECK-NEXT: call void @use(ptr [[A]])
+; CHECK-NEXT: call void @use(ptr [[B]])
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_A]], i64 1
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [4 x i32], ptr [[B]], i64 0, i64 1
+; CHECK-NEXT: [[PHI_SROA_GEP1:%.*]] = getelementptr inbounds i32, ptr [[GEP_B]], i64 1
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: [[PHI_SROA_PHI:%.*]] = phi ptr [ [[PHI_SROA_GEP]], [[THEN]] ], [ [[PHI_SROA_GEP1]], [[ELSE]] ]
+; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[GEP_A]], [[THEN]] ], [ [[GEP_B]], [[ELSE]] ]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PHI_SROA_PHI]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %a = alloca [4 x i32], align 4
+ %b = alloca [4 x i32], align 4
+ call void @use(ptr %a)
+ call void @use(ptr %b)
+ br i1 %cond, label %then, label %else
+
+then:
+ %gep_a = getelementptr inbounds [4 x i32], ptr %a, i64 0, i64 1
+ br label %merge
+
+else:
+ %gep_b = getelementptr inbounds [4 x i32], ptr %b, i64 0, i64 1
+ br label %merge
+
+merge:
+ %phi = phi ptr [ %gep_a, %then ], [ %gep_b, %else ]
+ %elem1 = getelementptr inbounds i32, ptr %phi, i64 1
+ %val = load i32, ptr %elem1, align 4
+ ret i32 %val
+}
+
+define i32 @test_gep_phi_alloca_and_non_alloca_operands(i1 %cond, ptr %arg) {
+; CHECK-LABEL: @test_gep_phi_alloca_and_non_alloca_operands(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 4
+; CHECK-NEXT: store i32 42, ptr [[A]], align 4
+; CHECK-NEXT: call void @use(ptr [[A]])
+; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[GEP_ARG:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 0
+; CHECK-NEXT: [[PHI_SROA_GEP1:%.*]] = getelementptr inbounds i32, ptr [[GEP_ARG]], i64 1
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: [[PHI_SROA_PHI:%.*]] = phi ptr [ [[PHI_SROA_GEP]], [[THEN]] ], [ [[PHI_SROA_GEP1]], [[ELSE]] ]
+; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[THEN]] ], [ [[GEP_ARG]], [[ELSE]] ]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PHI_SROA_PHI]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %a = alloca [2 x i32], align 4
+ store i32 42, ptr %a, align 4
+ call void @use(ptr %a)
+ br i1 %cond, label %then, label %else
+
+then:
+ br label %merge
+
+else:
+ %gep_arg = getelementptr inbounds i32, ptr %arg, i64 0
+ br label %merge
+
+merge:
+ %phi = phi ptr [ %a, %then ], [ %gep_arg, %else ]
+ %elem1 = getelementptr inbounds i32, ptr %phi, i64 1
+ %val = load i32, ptr %elem1, align 4
+ ret i32 %val
+}
+
+declare void @use(ptr)
+
define i64 @test_unfold_phi_duplicate_phi_entry(ptr %arg, i8 %arg1, i1 %arg2) {
; CHECK-LABEL: @test_unfold_phi_duplicate_phi_entry(
; CHECK-NEXT: bb:
More information about the llvm-commits
mailing list