[clang] [llvm] [InstCombine] Preserve multi-dimensional array structure in GEP optimization (PR #176414)
Akash Dutta via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 16 11:01:37 PST 2026
https://github.com/akadutta updated https://github.com/llvm/llvm-project/pull/176414
>From 02783dd2e1679984611cc62c588e4b8e25b91223 Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 16 Jan 2026 09:14:20 -0600
Subject: [PATCH 1/2] Preserve multi-dimensional array structure in GEP
optimization
---
.../InstCombine/InstructionCombining.cpp | 96 +++++++++++--------
.../InstCombine/canonicalize-gep-constglob.ll | 4 +-
llvm/test/Transforms/InstCombine/strcmp-3.ll | 6 +-
llvm/test/Transforms/InstCombine/strlen-8.ll | 10 +-
4 files changed, 67 insertions(+), 49 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d24db3de8f7b3..0daf76227418b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2921,12 +2921,21 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
Indices.append(GEP.op_begin() + 2, GEP.op_end());
// Don't create GEPs with more than one non-zero index.
- unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
- auto *C = dyn_cast<Constant>(Idx);
- return !C || !C->isNullValue();
- });
- if (NumNonZeroIndices > 1)
- return nullptr;
+ // Exception: For AMDGPU, preserve multi-dimensional array structure for
+ // better backend optimization (memory coalescing, vectorization). Check if
+ // the source element type is a multi-dimensional array.
+ Type *GEPSrcElemTy = GEP.getSourceElementType();
+ bool IsMultiDimArray_Strip = GEPSrcElemTy->isArrayTy() &&
+ GEPSrcElemTy->getArrayElementType()->isArrayTy();
+
+ if (!IsMultiDimArray_Strip) {
+ unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
+ auto *C = dyn_cast<Constant>(Idx);
+ return !C || !C->isNullValue();
+ });
+ if (NumNonZeroIndices > 1)
+ return nullptr;
+ }
return replaceInstUsesWith(
GEP, Builder.CreateGEP(
@@ -3364,17 +3373,24 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
drop_end(Indices), "", GEP.getNoWrapFlags()));
}
- // Strip leading zero indices.
- auto *FirstIdx = dyn_cast<Constant>(Indices.front());
- if (FirstIdx && FirstIdx->isNullValue() &&
- !FirstIdx->getType()->isVectorTy()) {
- gep_type_iterator GTI = gep_type_begin(GEP);
- ++GTI;
- if (!GTI.isStruct())
- return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
- GEP.getPointerOperand(),
- drop_begin(Indices), "",
- GEP.getNoWrapFlags()));
+ // Strip leading zero indices (except for multi-dimensional arrays).
+ // Preserve structure for better backend optimization.
+ Type *GEPSrcElemTy = GEP.getSourceElementType();
+ bool IsMultiDimArray_Strip = GEPSrcElemTy->isArrayTy() &&
+ GEPSrcElemTy->getArrayElementType()->isArrayTy();
+
+ if (!IsMultiDimArray_Strip) {
+ auto *FirstIdx = dyn_cast<Constant>(Indices.front());
+ if (FirstIdx && FirstIdx->isNullValue() &&
+ !FirstIdx->getType()->isVectorTy()) {
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ ++GTI;
+ if (!GTI.isStruct())
+ return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
+ GEP.getPointerOperand(),
+ drop_begin(Indices), "",
+ GEP.getNoWrapFlags()));
+ }
}
// Scalarize vector operands; prefer splat-of-gep.as canonical form.
@@ -3403,29 +3419,33 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return replaceInstUsesWith(GEP, Res);
}
- bool SeenNonZeroIndex = false;
- for (auto [IdxNum, Idx] : enumerate(Indices)) {
- auto *C = dyn_cast<Constant>(Idx);
- if (C && C->isNullValue())
- continue;
+ // GEP has multiple non-zero indices: Split it (except for multi-dim arrays).
+ // Preserve structure for better backend optimization.
+ if (!IsMultiDimArray_Strip) {
+ bool SeenNonZeroIndex = false;
+ for (auto [IdxNum, Idx] : enumerate(Indices)) {
+ auto *C = dyn_cast<Constant>(Idx);
+ if (C && C->isNullValue())
+ continue;
- if (!SeenNonZeroIndex) {
- SeenNonZeroIndex = true;
- continue;
- }
+ if (!SeenNonZeroIndex) {
+ SeenNonZeroIndex = true;
+ continue;
+ }
- // GEP has multiple non-zero indices: Split it.
- ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
- Value *FrontGEP =
- Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
- GEP.getName() + ".split", GEP.getNoWrapFlags());
-
- SmallVector<Value *> BackIndices;
- BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
- append_range(BackIndices, drop_begin(Indices, IdxNum));
- return GetElementPtrInst::Create(
- GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
- BackIndices, GEP.getNoWrapFlags());
+ // GEP has multiple non-zero indices: Split it.
+ ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
+ Value *FrontGEP =
+ Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
+ GEP.getName() + ".split", GEP.getNoWrapFlags());
+
+ SmallVector<Value *> BackIndices;
+ BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
+ append_range(BackIndices, drop_begin(Indices, IdxNum));
+ return GetElementPtrInst::Create(
+ GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
+ BackIndices, GEP.getNoWrapFlags());
+ }
}
// Check to see if the inputs to the PHI node are getelementptr instructions.
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
index 129da3f9110ad..6d238ae497d07 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -35,9 +35,7 @@ define ptr @xzy(i64 %x, i64 %y, i64 %z) {
; CHECK-LABEL: define ptr @xzy(
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_SPLIT:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 [[X]]
-; CHECK-NEXT: [[GEP_SPLIT1:%.*]] = getelementptr inbounds [10 x i32], ptr [[GEP_SPLIT]], i64 [[Z]]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_SPLIT1]], i64 [[Y]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 0, i64 [[X]], i64 [[Z]], i64 [[Y]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/strcmp-3.ll b/llvm/test/Transforms/InstCombine/strcmp-3.ll
index 72da736a0a9fd..2c4012b96e188 100644
--- a/llvm/test/Transforms/InstCombine/strcmp-3.ll
+++ b/llvm/test/Transforms/InstCombine/strcmp-3.ll
@@ -25,7 +25,7 @@ define i32 @fold_strcmp_a5i0_a5i1_to_0() {
define i32 @call_strcmp_a5i0_a5iI(i64 %I) {
; CHECK-LABEL: @call_strcmp_a5i0_a5iI(
-; CHECK-NEXT: [[Q:%.*]] = getelementptr [4 x i8], ptr @a5, i64 [[I:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 [[I:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull dereferenceable(4) @a5, ptr noundef nonnull dereferenceable(1) [[Q]])
; CHECK-NEXT: ret i32 [[CMP]]
;
@@ -40,7 +40,7 @@ define i32 @call_strcmp_a5i0_a5iI(i64 %I) {
define i32 @call_strcmp_a5iI_a5i0(i64 %I) {
; CHECK-LABEL: @call_strcmp_a5iI_a5i0(
-; CHECK-NEXT: [[P:%.*]] = getelementptr [4 x i8], ptr @a5, i64 [[I:%.*]]
+; CHECK-NEXT: [[P:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 [[I:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull dereferenceable(1) [[P]], ptr noundef nonnull dereferenceable(4) @a5)
; CHECK-NEXT: ret i32 [[CMP]]
;
@@ -68,7 +68,7 @@ define i32 @fold_strcmp_a5i0_a5i1_p1_to_0() {
define i32 @call_strcmp_a5i0_a5i1_pI(i64 %I) {
; CHECK-LABEL: @call_strcmp_a5i0_a5i1_pI(
-; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5, i64 4), i64 [[I:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 1, i64 [[I:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull dereferenceable(4) @a5, ptr noundef nonnull dereferenceable(1) [[Q]])
; CHECK-NEXT: ret i32 [[CMP]]
;
diff --git a/llvm/test/Transforms/InstCombine/strlen-8.ll b/llvm/test/Transforms/InstCombine/strlen-8.ll
index af12198069803..b4334ddd8f1ac 100644
--- a/llvm/test/Transforms/InstCombine/strlen-8.ll
+++ b/llvm/test/Transforms/InstCombine/strlen-8.ll
@@ -16,7 +16,7 @@ declare i64 @strlen(ptr)
define i64 @fold_a5_4_i0_pI(i64 %I) {
; CHECK-LABEL: @fold_a5_4_i0_pI(
-; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr @a5_4, i64 [[I:%.*]]
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 0, i64 [[I:%.*]]
; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]])
; CHECK-NEXT: ret i64 [[LEN]]
;
@@ -30,7 +30,7 @@ define i64 @fold_a5_4_i0_pI(i64 %I) {
define i64 @fold_a5_4_i1_pI(i64 %I) {
; CHECK-LABEL: @fold_a5_4_i1_pI(
-; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 4), i64 [[I:%.*]]
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 1, i64 [[I:%.*]]
; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]])
; CHECK-NEXT: ret i64 [[LEN]]
;
@@ -44,7 +44,7 @@ define i64 @fold_a5_4_i1_pI(i64 %I) {
define i64 @fold_a5_4_i2_pI(i64 %I) {
; CHECK-LABEL: @fold_a5_4_i2_pI(
-; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 8), i64 [[I:%.*]]
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 2, i64 [[I:%.*]]
; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]])
; CHECK-NEXT: ret i64 [[LEN]]
;
@@ -58,7 +58,7 @@ define i64 @fold_a5_4_i2_pI(i64 %I) {
define i64 @fold_a5_4_i3_pI_to_0(i64 %I) {
; CHECK-LABEL: @fold_a5_4_i3_pI_to_0(
-; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 12), i64 [[I:%.*]]
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 3, i64 [[I:%.*]]
; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]])
; CHECK-NEXT: ret i64 [[LEN]]
;
@@ -72,7 +72,7 @@ define i64 @fold_a5_4_i3_pI_to_0(i64 %I) {
define i64 @fold_a5_4_i4_pI_to_0(i64 %I) {
; CHECK-LABEL: @fold_a5_4_i4_pI_to_0(
-; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 16), i64 [[I:%.*]]
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 4, i64 [[I:%.*]]
; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]])
; CHECK-NEXT: ret i64 [[LEN]]
;
>From fe8abc6c745ab61d14d08ff22df1d16d9aa1fbfc Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 16 Jan 2026 13:01:04 -0600
Subject: [PATCH 2/2] update clang test
---
clang/test/CodeGen/union-tbaa1.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGen/union-tbaa1.c b/clang/test/CodeGen/union-tbaa1.c
index 3f6ada5023f27..c512c011e588f 100644
--- a/clang/test/CodeGen/union-tbaa1.c
+++ b/clang/test/CodeGen/union-tbaa1.c
@@ -11,13 +11,13 @@ void bar(vect32 p[][2]);
// CHECK-LABEL: define dso_local void @fred(
// CHECK-SAME: i32 noundef [[NUM:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[VEC:%.*]], ptr noundef readonly captures(none) [[INDEX:%.*]], ptr noundef readonly captures(none) [[ARR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP:%.*]] = alloca [4 x [2 x %union.vect32]], align 8
+// CHECK-NEXT: [[TMP:%.*]] = alloca [4 x [2 x [[UNION_VECT32:%.*]]]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[TMP]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARR]], i32 [[TMP0]]
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP1]], [[NUM]]
-// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]]
+// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x [2 x [[UNION_VECT32]]]], ptr [[TMP]], i32 0, i32 [[TMP0]]
// CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA6:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i32 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]]
@@ -27,7 +27,7 @@ void bar(vect32 p[][2]);
// CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[MUL]], 16
// CHECK-NEXT: store i32 [[TMP3]], ptr [[VEC]], align 4, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP4]]
+// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [4 x [2 x [[UNION_VECT32]]]], ptr [[TMP]], i32 0, i32 [[TMP4]]
// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX13]], i32 6
// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2, !tbaa [[TBAA6]]
// CHECK-NEXT: [[CONV16:%.*]] = zext i16 [[TMP5]] to i32
More information about the cfe-commits
mailing list