[llvm] [InstCombine] Add limit for expansion of gep chains (PR #147065)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 07:28:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nikita Popov (nikic)
<details>
<summary>Changes</summary>
When converting gep subtraction / comparison to offset subtraction / comparison, avoid expanding very long multi-use gep chains.
Another improvement we could make is to expand one-use followed by multi-use gep differently, by rewriting the multi-use gep to include the one-use offsets. But I think we want to have some kind of complexity cut-off in any case.
---
Full diff: https://github.com/llvm/llvm-project/pull/147065.diff
5 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+23-1)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+1-1)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineInternal.h (+3)
- (modified) llvm/test/Transforms/InstCombine/icmp-gep.ll (+88)
- (modified) llvm/test/Transforms/InstCombine/sub-gep.ll (+52)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f727eb0a63e05..5dae81a5bea9c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2144,13 +2144,35 @@ CommonPointerBase CommonPointerBase::compute(Value *LHS, Value *RHS) {
return Base;
}
+bool CommonPointerBase::isExpensive() const {
+ bool SeenConst = false;
+ unsigned NumGEPs = 0;
+ auto ProcessGEPs = [&SeenConst, &NumGEPs](ArrayRef<GEPOperator *> GEPs) {
+ bool SeenMultiUse = false;
+ for (GEPOperator *GEP : GEPs) {
+ // Only count GEPs after the first multi-use GEP. For the first one,
+ // we will directly reuse the offset.
+ if (SeenMultiUse) {
+ bool IsConst = GEP->hasAllConstantIndices();
+ SeenConst |= IsConst;
+ NumGEPs += !IsConst;
+ }
+ SeenMultiUse |= !GEP->hasOneUse();
+ }
+ };
+ ProcessGEPs(LHSGEPs);
+ ProcessGEPs(RHSGEPs);
+ NumGEPs += SeenConst;
+ return NumGEPs > 2;
+}
+
/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty, bool IsNUW) {
CommonPointerBase Base = CommonPointerBase::compute(LHS, RHS);
- if (!Base.Ptr)
+ if (!Base.Ptr || Base.isExpensive())
return nullptr;
// To avoid duplicating the offset arithmetic, rewrite the GEP to use the
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6737b50405ee2..cbe4e3718a75c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -709,7 +709,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
};
CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS);
- if (Base.Ptr == RHS && CanFold(Base.LHSNW)) {
+ if (Base.Ptr == RHS && CanFold(Base.LHSNW) && !Base.isExpensive()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
Type *IdxTy = DL.getIndexType(GEPLHS->getType());
Value *Offset =
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9adde8094d44d..4ffc11db6856f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -896,6 +896,9 @@ struct CommonPointerBase {
GEPNoWrapFlags RHSNW = GEPNoWrapFlags::all();
static CommonPointerBase compute(Value *LHS, Value *RHS);
+
+ /// Whether expanding the GEP chains is expensive.
+ bool isExpensive() const;
};
} // end namespace llvm
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index 3f104056fb1f2..6c7572089532a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -849,3 +849,91 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) {
%cmp = icmp ugt ptr %gep2, %base
ret i1 %cmp
}
+
+define i1 @gep_multiple_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3) {
+; CHECK-LABEL: @gep_multiple_multi_use_below_limit(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
+; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
+; CHECK-NEXT: call void @use(ptr [[GEP3]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP3_IDX]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 %idx1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+ %gep3 = getelementptr i32, ptr %gep2, i64 %idx3
+ call void @use(ptr %gep3)
+ %cmp = icmp eq ptr %gep3, %base
+ ret i1 %cmp
+}
+
+define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
+; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
+; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = mul i64 [[IDX4:%.*]], -4
+; CHECK-NEXT: call void @use(ptr [[GEP3]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 %idx1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+ %gep3 = getelementptr i32, ptr %gep2, i64 %idx3
+ %gep4 = getelementptr i32, ptr %gep3, i64 %idx4
+ call void @use(ptr %gep3)
+ %cmp = icmp eq ptr %gep4, %base
+ ret i1 %cmp
+}
+
+define i1 @gep_multiple_multi_use_below_limit_consts(ptr %base, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @gep_multiple_multi_use_below_limit_consts(
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 16
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 16
+; CHECK-NEXT: [[GEP4_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP4_IDX]]
+; CHECK-NEXT: call void @use(ptr [[GEP4]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP2_IDX]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP4_IDX]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 4
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx1
+ %gep3 = getelementptr i32, ptr %gep2, i64 4
+ %gep4 = getelementptr i32, ptr %gep3, i64 %idx2
+ call void @use(ptr %gep4)
+ %cmp = icmp eq ptr %gep4, %base
+ ret i1 %cmp
+}
+
+define i1 @gep_multiple_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_multiple_multi_use_above_limit(
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[GEP2]], i64 [[IDX3:%.*]]
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX4:%.*]]
+; CHECK-NEXT: call void @use(ptr [[GEP4]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP4]], [[BASE]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 %idx1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+ %gep3 = getelementptr i32, ptr %gep2, i64 %idx3
+ %gep4 = getelementptr i32, ptr %gep3, i64 %idx4
+ call void @use(ptr %gep4)
+ %cmp = icmp eq ptr %gep4, %base
+ ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index 993a06ad1780f..c53973c482f39 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -1089,3 +1089,55 @@ define <2 x i64> @splat_geps_multiple(ptr %base, i64 %idx0, <2 x i64> %idx1, <2
%d = sub <2 x i64> %gep2.int, %gep1.int
ret <2 x i64> %d
}
+
+define i64 @multiple_geps_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @multiple_geps_multi_use_below_limit(
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P2]])
+; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
+; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P4]])
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IDX1]], [[IDX2]]
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[IDX3]], [[IDX4]]
+; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i64 [[GEPDIFF]]
+;
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
+ %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
+ call void @use(ptr %p2)
+ %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
+ %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
+ call void @use(ptr %p4)
+ %i1 = ptrtoint ptr %p4 to i64
+ %i2 = ptrtoint ptr %p2 to i64
+ %d = sub i64 %i2, %i1
+ ret i64 %d
+}
+
+define i64 @multiple_geps_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5) {
+; CHECK-LABEL: @multiple_geps_multi_use_above_limit(
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P2]])
+; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
+; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
+; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P4]], i64 [[IDX5:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P5]])
+; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[P5]] to i64
+; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[P2]] to i64
+; CHECK-NEXT: [[D:%.*]] = sub i64 [[I2]], [[I1]]
+; CHECK-NEXT: ret i64 [[D]]
+;
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
+ %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
+ call void @use(ptr %p2)
+ %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
+ %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
+ %p5 = getelementptr inbounds nuw i8, ptr %p4, i64 %idx5
+ call void @use(ptr %p5)
+ %i1 = ptrtoint ptr %p5 to i64
+ %i2 = ptrtoint ptr %p2 to i64
+ %d = sub i64 %i2, %i1
+ ret i64 %d
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/147065
More information about the llvm-commits
mailing list