[llvm] [InstCombine] Lower multi-dimensional GEP to ptradd (PR #150383)
Usha Gupta via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 08:54:44 PDT 2025
https://github.com/usha1830 updated https://github.com/llvm/llvm-project/pull/150383
>From 269f0ddde13fb6b95bcf530cf071ceaca109f037 Mon Sep 17 00:00:00 2001
From: Usha Gupta <usha.gupta at arm.com>
Date: Thu, 24 Jul 2025 07:26:14 +0000
Subject: [PATCH 1/3] [InstCombine] Lower multi-dimensional GEP to ptradd
---
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5ee3bb1abe86e..a62a462f26918 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3017,6 +3017,15 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
m_Shl(m_Value(), m_ConstantInt())))))
return true;
+ // Flatten multidimensional GEPs with one variable index.
+ unsigned NumVarIndices = 0;
+ for (unsigned i = 1; i < GEP.getNumOperands(); ++i) {
+ if (!isa<ConstantInt>(GEP.getOperand(i)))
+ ++NumVarIndices;
+ }
+ if (NumVarIndices == 1)
+ return true;
+
// gep (gep %p, C1), %x, C2 is expanded so the two constants can
// possibly be merged together.
auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
>From 4c121404369c094a45543977e96c9e29469a0c6a Mon Sep 17 00:00:00 2001
From: Usha Gupta <usha.gupta at arm.com>
Date: Fri, 25 Jul 2025 19:05:05 +0000
Subject: [PATCH 2/3] Add more constraints for handing multi-dimensional geps
for global arrays
---
.../InstCombine/InstructionCombining.cpp | 45 +++++++++++---
.../InstCombine/canonicalize-gep-constglob.ll | 61 +++++++++++++++++++
2 files changed, 99 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a62a462f26918..9909a6901b63f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2997,6 +2997,43 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
return nullptr;
}
+/// Return true if we should lower multi-dimensional geps
+static bool ismultiDimGep(GetElementPtrInst &GEP) {
+ // Limit handling to only 3D and 4D arrays with integer types.
+ // getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3
+ unsigned NumOps = GEP.getNumOperands();
+
+ // First index must be constant zero (array base)
+ if (!isa<ConstantInt>(GEP.getOperand(1)) ||
+ !cast<ConstantInt>(GEP.getOperand(1))->isZero())
+ return false;
+
+ // Limit lowering for arrays with 3 or more dimensions
+ if (NumOps < 5)
+ return false;
+
+ // Check that it's arrays all the way
+ Type *CurTy = GEP.getSourceElementType();
+ unsigned NumVar = 0;
+ for (unsigned I = 2; I < NumOps; ++I) {
+ auto *ArrTy = dyn_cast<ArrayType>(CurTy);
+ if (!ArrTy)
+ return false;
+ if (!isa<ConstantInt>(GEP.getOperand(I)))
+ ++NumVar;
+ CurTy = ArrTy->getElementType();
+ }
+
+ // Limit lowering only for one variable index
+ if (NumVar != 1)
+ return false;
+
+ if (!CurTy->isIntegerTy() || CurTy->getIntegerBitWidth() > 128)
+ return false;
+
+ return true;
+}
+
/// Return true if we should canonicalize the gep to an i8 ptradd.
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
Value *PtrOp = GEP.getOperand(0);
@@ -3017,13 +3054,7 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
m_Shl(m_Value(), m_ConstantInt())))))
return true;
- // Flatten multidimensional GEPs with one variable index.
- unsigned NumVarIndices = 0;
- for (unsigned i = 1; i < GEP.getNumOperands(); ++i) {
- if (!isa<ConstantInt>(GEP.getOperand(i)))
- ++NumVarIndices;
- }
- if (NumVarIndices == 1)
+ if (ismultiDimGep(GEP))
return true;
// gep (gep %p, C1), %x, C2 is expanded so the two constants can
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
index 07c8a8c6b90e1..1a1b3d1a746ed 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -2,6 +2,9 @@
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
@glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer
+ at glob_i8 = internal global [10 x [10 x [10 x i8]]] zeroinitializer
+ at glob_i16 = internal global [10 x [10 x [10 x i16]]] zeroinitializer
+ at glob_i64 = internal global [10 x [10 x [10 x i64]]] zeroinitializer
define ptr @x12(i64 %x) {
; CHECK-LABEL: define ptr @x12(
@@ -78,3 +81,61 @@ entry:
%c = add i32 %a, %b
ret i32 %c
}
+
+define i8* @flat_gep8(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep8(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 100
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i8, i64 [[GEP_IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 35
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr [10 x [10 x [10 x i8]]], ptr @glob_i8, i64 0, i64 %x, i64 3, i64 5
+ ret ptr %gep
+}
+
+define i16* @flat_gep16(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep16(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 200
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i16, i64 [[GEP_IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 46
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr [10 x [10 x [10 x i16]]], ptr @glob_i16, i64 0, i64 %x, i64 2, i64 3
+ ret ptr %gep
+}
+
+define i32* @flat_gep(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 400
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob, i64 [[GEP_IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 100
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr [10 x [10 x [10 x i32]]], ptr @glob, i64 0, i64 %x, i64 2, i64 5
+ ret ptr %gep
+}
+
+define i64* @flat_gep64(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep64(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 800
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i64, i64 [[GEP_IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 288
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr [10 x [10 x [10 x i64]]], ptr @glob_i64, i64 0, i64 %x, i64 3, i64 6
+ ret ptr %gep
+}
+
+
>From 82e440d2125223817ad8b051b750e0473458fda4 Mon Sep 17 00:00:00 2001
From: Usha Gupta <usha.gupta at arm.com>
Date: Fri, 1 Aug 2025 15:41:24 +0000
Subject: [PATCH 3/3] Handle Nested gep with one variable index in the outer
gep
---
.../InstCombine/InstructionCombining.cpp | 57 +++++++++++++++----
1 file changed, 46 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9909a6901b63f..b60b7c7c352af 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2997,29 +2997,64 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
return nullptr;
}
-/// Return true if we should lower multi-dimensional geps
-static bool ismultiDimGep(GetElementPtrInst &GEP) {
- // Limit handling to only 3D and 4D arrays with integer types.
- // getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3
- unsigned NumOps = GEP.getNumOperands();
+/// Accumulate constant indices from GEPs with all-constant indices, then
+/// check if the outermost GEP (with one variable index) is flattenable.
+/// Matches and returns true for multi-dimensional array geps with only one
+/// variable index. The pointer could also be another gep with all constant
+/// indices. For ex:
+/// -getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3
+/// -getelementptr [9 x [9 x [9 x i32]]],
+/// <another gep>, i64 0, i64 %i, i64 2, i64 3
+static bool ismultiDimGepFlattenable(const GetElementPtrInst &GEP) {
+ // Collect all indices, outermost last
+ SmallVector<const GEPOperator *, 4> GEPChain;
+ const Value *Base = &GEP;
+
+ // Go over GEPs with all constant indices
+ while (auto *CurGep = dyn_cast<GEPOperator>(Base)) {
+ bool AllConst = true;
+ for (unsigned I = 1; I < CurGep->getNumOperands(); ++I)
+ if (!isa<ConstantInt>(CurGep->getOperand(I)))
+ AllConst = false;
+ if (!AllConst)
+ break;
+ GEPChain.push_back(CurGep);
+ Base = CurGep->getOperand(0)->stripPointerCasts();
+ }
+
+ // Accumulate all indices from innermost to outermost
+ SmallVector<Value *, 8> Indices;
+ for (int I = GEPChain.size() - 1; I >= 0; --I) {
+ const GEPOperator *GO = GEPChain[I];
+ for (unsigned J = 1; J < GO->getNumOperands(); ++J)
+ Indices.push_back(GO->getOperand(J));
+ }
+
+ // Add indices from the main GEP (skip pointer operand)
+ for (unsigned J = 1; J < GEP.getNumOperands(); ++J)
+ Indices.push_back(GEP.getOperand(J));
+
+ if (Indices.empty())
+ return false;
// First index must be constant zero (array base)
- if (!isa<ConstantInt>(GEP.getOperand(1)) ||
- !cast<ConstantInt>(GEP.getOperand(1))->isZero())
+ if (!isa<ConstantInt>(Indices[0]) || !cast<ConstantInt>(Indices[0])->isZero())
return false;
+ unsigned NumDims = Indices.size() - 1;
+
// Limit lowering for arrays with 3 or more dimensions
- if (NumOps < 5)
+ if (NumDims < 3)
return false;
// Check that it's arrays all the way
Type *CurTy = GEP.getSourceElementType();
unsigned NumVar = 0;
- for (unsigned I = 2; I < NumOps; ++I) {
+ for (unsigned I = 1; I < Indices.size(); ++I) {
auto *ArrTy = dyn_cast<ArrayType>(CurTy);
if (!ArrTy)
return false;
- if (!isa<ConstantInt>(GEP.getOperand(I)))
+ if (!isa<ConstantInt>(Indices[I]))
++NumVar;
CurTy = ArrTy->getElementType();
}
@@ -3054,7 +3089,7 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
m_Shl(m_Value(), m_ConstantInt())))))
return true;
- if (ismultiDimGep(GEP))
+ if (ismultiDimGepFlattenable(GEP))
return true;
// gep (gep %p, C1), %x, C2 is expanded so the two constants can
More information about the llvm-commits
mailing list