[llvm] [InstCombine] Avoid foldCmpLoadFromIndexedGlobal for multi-use load (PR #151524)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 06:58:47 PDT 2025
https://github.com/nikic created https://github.com/llvm/llvm-project/pull/151524
Do not perform the foldCmpLoadFromIndexedGlobal() transform if we have to create more than one instruction but the load cannot be removed due to multi-use.
>From 9fbbebc87f8c8b0d25f40451fcd8ce16d7e0a832 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 31 Jul 2025 15:55:31 +0200
Subject: [PATCH] [InstCombine] Avoid foldCmpLoadFromIndexedGlobal for
multi-use load
Do not perform the foldCmpLoadFromIndexedGlobal() transform if
we have to create more than one instruction but the load cannot
be removed due to multi-use.
---
.../InstCombine/InstCombineCompares.cpp | 36 +++-
llvm/test/Transforms/InstCombine/load-cmp.ll | 171 ++++++++++++++++++
2 files changed, 199 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b268fea85ab07..181dbeb754882 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -305,8 +305,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
unsigned ElementSize =
DL.getTypeAllocSize(Init->getType()->getArrayElementType());
+ bool NeedsMask = !GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0;
auto MaskIdx = [&](Value *Idx) {
- if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
+ if (NeedsMask) {
Value *Mask = Constant::getAllOnesValue(Idx->getType());
Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
Idx = Builder.CreateAnd(Idx, Mask);
@@ -314,6 +315,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
return Idx;
};
+ if (!LI->hasOneUse() && NeedsMask)
+ return nullptr;
+
// If the comparison is only true for one or two elements, emit direct
// comparisons.
if (SecondTrueElement != Overdefined) {
@@ -328,6 +332,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
if (SecondTrueElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+ if (!LI->hasOneUse())
+ return nullptr;
+
// True for two elements -> 'i == 47 | i == 72'.
Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
@@ -349,6 +356,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
if (SecondFalseElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+ if (!LI->hasOneUse())
+ return nullptr;
+
// False for two elements -> 'i != 47 & i != 72'.
Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
Value *SecondFalseIdx =
@@ -365,6 +375,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
if (FirstTrueElement) {
+ if (!LI->hasOneUse())
+ return nullptr;
+
Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
Idx = Builder.CreateAdd(Idx, Offs);
}
@@ -380,6 +393,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Idx = MaskIdx(Idx);
// Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
if (FirstFalseElement) {
+ if (!LI->hasOneUse())
+ return nullptr;
+
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
Idx = Builder.CreateAdd(Idx, Offs);
}
@@ -389,6 +405,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
+ if (!LI->hasOneUse())
+ return nullptr;
+
// If a magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
@@ -1952,13 +1971,14 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
// Try to optimize things like "A[i] & 42 == 0" to index computations.
Value *X = And->getOperand(0);
Value *Y = And->getOperand(1);
- if (auto *C2 = dyn_cast<ConstantInt>(Y))
- if (auto *LI = dyn_cast<LoadInst>(X))
- if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
- if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (Instruction *Res =
- foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2))
- return Res;
+ if (And->hasOneUse())
+ if (auto *C2 = dyn_cast<ConstantInt>(Y))
+ if (auto *LI = dyn_cast<LoadInst>(X))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+ if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (Instruction *Res =
+ foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2))
+ return Res;
if (!Cmp.isEquality())
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index ccaf31f3084d6..dab458fa59152 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -22,6 +22,9 @@
%Foo { i32 6, i32 5, i32 9, i32 20 },
%Foo { i32 12, i32 3, i32 9, i32 8 } ]
+declare void @use.i16(i16)
+declare void @use.i32(i32)
+declare void @use.f64(double)
define i1 @test1(i32 %X) {
; CHECK-LABEL: @test1(
@@ -34,6 +37,21 @@ define i1 @test1(i32 %X) {
ret i1 %R
}
+define i1 @test1_multiuse(i32 %X) {
+; CHECK-LABEL: @test1_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT: call void @use.i16(i16 [[Q]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X]], 9
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+ %Q = load i16, ptr %P
+ call void @use.i16(i16 %Q)
+ %R = icmp eq i16 %Q, 0
+ ret i1 %R
+}
+
define i1 @test1_noinbounds(i32 %X) {
; CHECK-LABEL: @test1_noinbounds(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
@@ -46,6 +64,21 @@ define i1 @test1_noinbounds(i32 %X) {
ret i1 %R
}
+define i1 @test1_noinbounds_multiuse(i32 %X) {
+; CHECK-LABEL: @test1_noinbounds_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT: call void @use.i16(i16 [[Q]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[Q]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr [10 x i16], ptr @G16, i32 0, i32 %X
+ %Q = load i16, ptr %P
+ call void @use.i16(i16 %Q)
+ %R = icmp eq i16 %Q, 0
+ ret i1 %R
+}
+
define i1 @test1_noinbounds_i64(i64 %X) {
; CHECK-LABEL: @test1_noinbounds_i64(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
@@ -95,6 +128,21 @@ define i1 @test2(i32 %X) {
ret i1 %R
}
+define i1 @test2_multiuse(i32 %X) {
+; CHECK-LABEL: @test2_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT: call void @use.i16(i16 [[Q]])
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X]], 4
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+ %Q = load i16, ptr %P
+ call void @use.i16(i16 %Q)
+ %R = icmp slt i16 %Q, 85
+ ret i1 %R
+}
+
define i1 @test3(i32 %X) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 1
@@ -104,7 +152,21 @@ define i1 @test3(i32 %X) {
%Q = load double, ptr %P
%R = fcmp oeq double %Q, 1.0
ret i1 %R
+}
+define i1 @test3_multiuse(i32 %X) {
+; CHECK-LABEL: @test3_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load double, ptr [[P]], align 8
+; CHECK-NEXT: call void @use.f64(double [[Q]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
+ %Q = load double, ptr %P
+ call void @use.f64(double %Q)
+ %R = fcmp oeq double %Q, 1.0
+ ret i1 %R
}
define i1 @test4(i32 %X) {
@@ -120,6 +182,21 @@ define i1 @test4(i32 %X) {
ret i1 %R
}
+define i1 @test4_multiuse(i32 %X) {
+; CHECK-LABEL: @test4_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT: call void @use.i16(i16 [[Q]])
+; CHECK-NEXT: [[R:%.*]] = icmp slt i16 [[Q]], 74
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+ %Q = load i16, ptr %P
+ call void @use.i16(i16 %Q)
+ %R = icmp sle i16 %Q, 73
+ ret i1 %R
+}
+
define i1 @test4_i16(i16 %X) {
; CHECK-LABEL: @test4_i16(
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[X:%.*]] to i32
@@ -147,6 +224,21 @@ define i1 @test5(i32 %X) {
ret i1 %R
}
+define i1 @test5_multiuse(i32 %X) {
+; CHECK-LABEL: @test5_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT: call void @use.i16(i16 [[Q]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[Q]], 69
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+ %Q = load i16, ptr %P
+ call void @use.i16(i16 %Q)
+ %R = icmp eq i16 %Q, 69
+ ret i1 %R
+}
+
define i1 @test6(i32 %X) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1
@@ -159,6 +251,21 @@ define i1 @test6(i32 %X) {
ret i1 %R
}
+define i1 @test6_multiuse(i32 %X) {
+; CHECK-LABEL: @test6_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load double, ptr [[P]], align 8
+; CHECK-NEXT: call void @use.f64(double [[Q]])
+; CHECK-NEXT: [[R:%.*]] = fcmp ogt double [[Q]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
+ %Q = load double, ptr %P
+ call void @use.f64(double %Q)
+ %R = fcmp ogt double %Q, 0.0
+ ret i1 %R
+}
+
define i1 @test7(i32 %X) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -4
@@ -171,6 +278,21 @@ define i1 @test7(i32 %X) {
ret i1 %R
}
+define i1 @test7_multiuse(i32 %X) {
+; CHECK-LABEL: @test7_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load double, ptr [[P]], align 8
+; CHECK-NEXT: call void @use.f64(double [[Q]])
+; CHECK-NEXT: [[R:%.*]] = fcmp olt double [[Q]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
+ %Q = load double, ptr %P
+ call void @use.f64(double %Q)
+ %R = fcmp olt double %Q, 0.0
+ ret i1 %R
+}
+
define i1 @test8(i32 %X) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2
@@ -184,6 +306,40 @@ define i1 @test8(i32 %X) {
ret i1 %S
}
+define i1 @test8_multiuse1(i32 %X) {
+; CHECK-LABEL: @test8_multiuse1(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT: call void @use.i16(i16 [[Q]])
+; CHECK-NEXT: [[R:%.*]] = and i16 [[Q]], 3
+; CHECK-NEXT: [[S:%.*]] = icmp eq i16 [[R]], 0
+; CHECK-NEXT: ret i1 [[S]]
+;
+ %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+ %Q = load i16, ptr %P
+ call void @use.i16(i16 %Q)
+ %R = and i16 %Q, 3
+ %S = icmp eq i16 %R, 0
+ ret i1 %S
+}
+
+define i1 @test8_multiuse2(i32 %X) {
+; CHECK-LABEL: @test8_multiuse2(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT: [[R:%.*]] = and i16 [[Q]], 3
+; CHECK-NEXT: call void @use.i16(i16 [[R]])
+; CHECK-NEXT: [[S:%.*]] = icmp eq i16 [[R]], 0
+; CHECK-NEXT: ret i1 [[S]]
+;
+ %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+ %Q = load i16, ptr %P
+ %R = and i16 %Q, 3
+ call void @use.i16(i16 %R)
+ %S = icmp eq i16 %R, 0
+ ret i1 %S
+}
+
@GA = internal constant [4 x { i32, i32 } ] [
{ i32, i32 } { i32 1, i32 0 },
{ i32, i32 } { i32 2, i32 1 },
@@ -203,6 +359,21 @@ define i1 @test9(i32 %X) {
ret i1 %R
}
+define i1 @test9_multiuse(i32 %X) {
+; CHECK-LABEL: @test9_multiuse(
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4 x { i32, i32 }], ptr @GA, i32 0, i32 [[X:%.*]], i32 1
+; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: call void @use.i32(i32 [[Q]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[Q]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds [4 x { i32, i32 } ], ptr @GA, i32 0, i32 %X, i32 1
+ %Q = load i32, ptr %P
+ call void @use.i32(i32 %Q)
+ %R = icmp eq i32 %Q, 1
+ ret i1 %R
+}
+
define i1 @test10_struct(i32 %x) {
; CHECK-LABEL: @test10_struct(
; CHECK-NEXT: ret i1 false
More information about the llvm-commits
mailing list