[llvm] [InstCombine] Avoid foldCmpLoadFromIndexedGlobal for multi-use load (PR #151524)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 31 06:58:47 PDT 2025


https://github.com/nikic created https://github.com/llvm/llvm-project/pull/151524

Do not perform the foldCmpLoadFromIndexedGlobal() transform if we have to create more than one instruction but the load cannot be removed due to multi-use.

>From 9fbbebc87f8c8b0d25f40451fcd8ce16d7e0a832 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 31 Jul 2025 15:55:31 +0200
Subject: [PATCH] [InstCombine] Avoid foldCmpLoadFromIndexedGlobal for
 multi-use load

Do not perform the foldCmpLoadFromIndexedGlobal() transform if
we have to create more than one instruction but the load cannot
be removed due to multi-use.
---
 .../InstCombine/InstCombineCompares.cpp       |  36 +++-
 llvm/test/Transforms/InstCombine/load-cmp.ll  | 171 ++++++++++++++++++
 2 files changed, 199 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b268fea85ab07..181dbeb754882 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -305,8 +305,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
   // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
   unsigned ElementSize =
       DL.getTypeAllocSize(Init->getType()->getArrayElementType());
+  bool NeedsMask = !GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0;
   auto MaskIdx = [&](Value *Idx) {
-    if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
+    if (NeedsMask) {
       Value *Mask = Constant::getAllOnesValue(Idx->getType());
       Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
       Idx = Builder.CreateAnd(Idx, Mask);
@@ -314,6 +315,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     return Idx;
   };
 
+  if (!LI->hasOneUse() && NeedsMask)
+    return nullptr;
+
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
   if (SecondTrueElement != Overdefined) {
@@ -328,6 +332,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     if (SecondTrueElement == Undefined)
       return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
 
+    if (!LI->hasOneUse())
+      return nullptr;
+
     // True for two elements -> 'i == 47 | i == 72'.
     Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
     Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
@@ -349,6 +356,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     if (SecondFalseElement == Undefined)
       return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
 
+    if (!LI->hasOneUse())
+      return nullptr;
+
     // False for two elements -> 'i != 47 & i != 72'.
     Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
     Value *SecondFalseIdx =
@@ -365,6 +375,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
 
     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
     if (FirstTrueElement) {
+      if (!LI->hasOneUse())
+        return nullptr;
+
       Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
       Idx = Builder.CreateAdd(Idx, Offs);
     }
@@ -380,6 +393,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     Idx = MaskIdx(Idx);
     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
     if (FirstFalseElement) {
+      if (!LI->hasOneUse())
+        return nullptr;
+
       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
       Idx = Builder.CreateAdd(Idx, Offs);
     }
@@ -389,6 +405,9 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
   }
 
+  if (!LI->hasOneUse())
+    return nullptr;
+
   // If a magic bitvector captures the entire comparison state
   // of this load, replace it with computation that does:
   //   ((magic_cst >> i) & 1) != 0
@@ -1952,13 +1971,14 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
   // Try to optimize things like "A[i] & 42 == 0" to index computations.
   Value *X = And->getOperand(0);
   Value *Y = And->getOperand(1);
-  if (auto *C2 = dyn_cast<ConstantInt>(Y))
-    if (auto *LI = dyn_cast<LoadInst>(X))
-      if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
-        if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
-          if (Instruction *Res =
-                  foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2))
-            return Res;
+  if (And->hasOneUse())
+    if (auto *C2 = dyn_cast<ConstantInt>(Y))
+      if (auto *LI = dyn_cast<LoadInst>(X))
+        if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+          if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (Instruction *Res =
+                    foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2))
+              return Res;
 
   if (!Cmp.isEquality())
     return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index ccaf31f3084d6..dab458fa59152 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -22,6 +22,9 @@
   %Foo { i32 6, i32 5, i32 9, i32 20 },
   %Foo { i32 12, i32 3, i32 9, i32 8 } ]
 
+declare void @use.i16(i16)
+declare void @use.i32(i32)
+declare void @use.f64(double)
 
 define i1 @test1(i32 %X) {
 ; CHECK-LABEL: @test1(
@@ -34,6 +37,21 @@ define i1 @test1(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test1_multiuse(i32 %X) {
+; CHECK-LABEL: @test1_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    call void @use.i16(i16 [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X]], 9
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+  %Q = load i16, ptr %P
+  call void @use.i16(i16 %Q)
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+}
+
 define i1 @test1_noinbounds(i32 %X) {
 ; CHECK-LABEL: @test1_noinbounds(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
@@ -46,6 +64,21 @@ define i1 @test1_noinbounds(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test1_noinbounds_multiuse(i32 %X) {
+; CHECK-LABEL: @test1_noinbounds_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    call void @use.i16(i16 [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[Q]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr [10 x i16], ptr @G16, i32 0, i32 %X
+  %Q = load i16, ptr %P
+  call void @use.i16(i16 %Q)
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+}
+
 define i1 @test1_noinbounds_i64(i64 %X) {
 ; CHECK-LABEL: @test1_noinbounds_i64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
@@ -95,6 +128,21 @@ define i1 @test2(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test2_multiuse(i32 %X) {
+; CHECK-LABEL: @test2_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    call void @use.i16(i16 [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X]], 4
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+  %Q = load i16, ptr %P
+  call void @use.i16(i16 %Q)
+  %R = icmp slt i16 %Q, 85
+  ret i1 %R
+}
+
 define i1 @test3(i32 %X) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X:%.*]], 1
@@ -104,7 +152,21 @@ define i1 @test3(i32 %X) {
   %Q = load double, ptr %P
   %R = fcmp oeq double %Q, 1.0
   ret i1 %R
+}
 
+define i1 @test3_multiuse(i32 %X) {
+; CHECK-LABEL: @test3_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load double, ptr [[P]], align 8
+; CHECK-NEXT:    call void @use.f64(double [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X]], 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
+  %Q = load double, ptr %P
+  call void @use.f64(double %Q)
+  %R = fcmp oeq double %Q, 1.0
+  ret i1 %R
 }
 
 define i1 @test4(i32 %X) {
@@ -120,6 +182,21 @@ define i1 @test4(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test4_multiuse(i32 %X) {
+; CHECK-LABEL: @test4_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    call void @use.i16(i16 [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i16 [[Q]], 74
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+  %Q = load i16, ptr %P
+  call void @use.i16(i16 %Q)
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+}
+
 define i1 @test4_i16(i16 %X) {
 ; CHECK-LABEL: @test4_i16(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i16 [[X:%.*]] to i32
@@ -147,6 +224,21 @@ define i1 @test5(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test5_multiuse(i32 %X) {
+; CHECK-LABEL: @test5_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    call void @use.i16(i16 [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[Q]], 69
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+  %Q = load i16, ptr %P
+  call void @use.i16(i16 %Q)
+  %R = icmp eq i16 %Q, 69
+  ret i1 %R
+}
+
 define i1 @test6(i32 %X) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
@@ -159,6 +251,21 @@ define i1 @test6(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test6_multiuse(i32 %X) {
+; CHECK-LABEL: @test6_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load double, ptr [[P]], align 8
+; CHECK-NEXT:    call void @use.f64(double [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = fcmp ogt double [[Q]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
+  %Q = load double, ptr %P
+  call void @use.f64(double %Q)
+  %R = fcmp ogt double %Q, 0.0
+  ret i1 %R
+}
+
 define i1 @test7(i32 %X) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -4
@@ -171,6 +278,21 @@ define i1 @test7(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test7_multiuse(i32 %X) {
+; CHECK-LABEL: @test7_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load double, ptr [[P]], align 8
+; CHECK-NEXT:    call void @use.f64(double [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = fcmp olt double [[Q]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X
+  %Q = load double, ptr %P
+  call void @use.f64(double %Q)
+  %R = fcmp olt double %Q, 0.0
+  ret i1 %R
+}
+
 define i1 @test8(i32 %X) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
@@ -184,6 +306,40 @@ define i1 @test8(i32 %X) {
   ret i1 %S
 }
 
+define i1 @test8_multiuse1(i32 %X) {
+; CHECK-LABEL: @test8_multiuse1(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    call void @use.i16(i16 [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = and i16 [[Q]], 3
+; CHECK-NEXT:    [[S:%.*]] = icmp eq i16 [[R]], 0
+; CHECK-NEXT:    ret i1 [[S]]
+;
+  %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+  %Q = load i16, ptr %P
+  call void @use.i16(i16 %Q)
+  %R = and i16 %Q, 3
+  %S = icmp eq i16 %R, 0
+  ret i1 %S
+}
+
+define i1 @test8_multiuse2(i32 %X) {
+; CHECK-LABEL: @test8_multiuse2(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    [[R:%.*]] = and i16 [[Q]], 3
+; CHECK-NEXT:    call void @use.i16(i16 [[R]])
+; CHECK-NEXT:    [[S:%.*]] = icmp eq i16 [[R]], 0
+; CHECK-NEXT:    ret i1 [[S]]
+;
+  %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X
+  %Q = load i16, ptr %P
+  %R = and i16 %Q, 3
+  call void @use.i16(i16 %R)
+  %S = icmp eq i16 %R, 0
+  ret i1 %S
+}
+
 @GA = internal constant [4 x { i32, i32 } ] [
   { i32, i32 } { i32 1, i32 0 },
   { i32, i32 } { i32 2, i32 1 },
@@ -203,6 +359,21 @@ define i1 @test9(i32 %X) {
   ret i1 %R
 }
 
+define i1 @test9_multiuse(i32 %X) {
+; CHECK-LABEL: @test9_multiuse(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [4 x { i32, i32 }], ptr @GA, i32 0, i32 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[Q:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    call void @use.i32(i32 [[Q]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[Q]], 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [4 x { i32, i32 } ], ptr @GA, i32 0, i32 %X, i32 1
+  %Q = load i32, ptr %P
+  call void @use.i32(i32 %Q)
+  %R = icmp eq i32 %Q, 1
+  ret i1 %R
+}
+
 define i1 @test10_struct(i32 %x) {
 ; CHECK-LABEL: @test10_struct(
 ; CHECK-NEXT:    ret i1 false



More information about the llvm-commits mailing list