[llvm] InstCombine: Avoid counting uses of constants (PR #136566)

Mon Apr 21 06:21:31 PDT 2025

https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/136566

Logically it does not matter; getFreelyInvertedImpl doesn't
depend on the value for the m_ImmConstant case.

This use count logic should probably sink into getFreelyInvertedImpl,
every use of this appears to just be a hasOneUse or hasNUse count,
so this could change to just be a use count threshold.

>From 76f7be4bbb2ce830185171e5dbf3ad91222d4931 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 20 Apr 2025 22:32:55 +0200
Subject: [PATCH] InstCombine: Avoid counting uses of constants

Logically it does not matter; getFreelyInvertedImpl doesn't
depend on the value for the m_ImmConstant case.

This use count logic should probably sink into getFreelyInvertedImpl,
every use of this appears to just be a hasOneUse or hasNUse count,
so this could change to just be a use count threshold.
---
 .../InstCombine/InstCombineCompares.cpp       |  4 +-
 llvm/test/Transforms/InstCombine/icmp.ll      | 93 +++++++++++++++++++
 2 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 55afe1258159a..b7b0bb7361359 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5034,8 +5034,8 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
 
   if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
     // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
-    if (Value *NotOp1 =
-            IC.getFreelyInverted(Op1, !Op1->hasNUsesOrMore(3), &IC.Builder))
+    if (Value *NotOp1 = IC.getFreelyInverted(
+            Op1, !isa<Constant>(Op1) && !Op1->hasNUsesOrMore(3), &IC.Builder))
       return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),
                           Constant::getNullValue(Op1->getType()));
     // icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X  is freely invertible.
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 6e1486660b24d..f5df8573d6304 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -2954,6 +2954,99 @@ define i1 @or1_eq1(i32 %x) {
   ret i1 %t1
 }
 
+define <2 x i1> @or1_eq1_vec(<2 x i32> %x) {
+; CHECK-LABEL: @or1_eq1_vec(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 2)
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i32> %x, splat (i32 1)
+  %t1 = icmp eq <2 x i32> %t0, splat (i32 1)
+  ret <2 x i1> %t1
+}
+
+define <2 x i1> @or_eq_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @or_eq_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -3>
+; CHECK-NEXT:    [[T1:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i32> %x, <i32 1, i32 2>
+  %t1 = icmp eq <2 x i32> %t0, <i32 1, i32 2>
+  ret <2 x i1> %t1
+}
+
+define void @or_eq_vec_multiple_nonsplat(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %ptr0, ptr %ptr1, ptr %ptr2) {
+; CHECK-LABEL: @or_eq_vec_multiple_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -3>
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    store <2 x i1> [[CMP0]], ptr [[PTR0:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -2, i32 -3>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    store <2 x i1> [[CMP1]], ptr [[PTR1:%.*]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[Z:%.*]], <i32 -2, i32 -3>
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    store <2 x i1> [[CMP2]], ptr [[PTR2:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+  %t0 = or <2 x i32> %x, <i32 1, i32 2>
+  %cmp0 = icmp eq <2 x i32> %t0, <i32 1, i32 2>
+  store <2 x i1> %cmp0, ptr %ptr0
+
+  %t1 = or <2 x i32> %y, <i32 1, i32 2>
+  %cmp1 = icmp eq <2 x i32> %t1, <i32 1, i32 2>
+  store <2 x i1> %cmp1, ptr %ptr1
+
+  %t2 = or <2 x i32> %z, <i32 1, i32 2>
+  %cmp2 = icmp eq <2 x i32> %t2, <i32 1, i32 2>
+  store <2 x i1> %cmp2, ptr %ptr2
+  ret void
+}
+
+; Make sure use count of 1 doesn't matter
+define i1 @or1_eq1_multiple(i32 %x, i32 %y, i32 %z, ptr %ptr0, ptr %ptr1) {
+; CHECK-LABEL: @or1_eq1_multiple(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[X:%.*]], 2
+; CHECK-NEXT:    store i1 [[CMP1]], ptr [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[Y:%.*]], 2
+; CHECK-NEXT:    store i1 [[CMP2]], ptr [[PTR1:%.*]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult i32 [[Z:%.*]], 2
+; CHECK-NEXT:    ret i1 [[CMP3]]
+;
+  %t0 = or i32 %x, 1
+  %cmp0 = icmp eq i32 %t0, 1
+  store i1 %cmp0, ptr %ptr0
+
+  %t1 = or i32 %y, 1
+  %cmp1 = icmp eq i32 %t1, 1
+  store i1 %cmp1, ptr %ptr1
+
+  %t2 = or i32 %z, 1
+  %cmp2 = icmp eq i32 %t2, 1
+  ret i1 %cmp2
+}
+
+define <2 x i1> @or1_eq1_multiple_vec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %ptr0, ptr %ptr1) {
+; CHECK-LABEL: @or1_eq1_multiple_vec(
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 2)
+; CHECK-NEXT:    store <2 x i1> [[CMP0]], ptr [[PTR0:%.*]], align 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[Y:%.*]], splat (i32 2)
+; CHECK-NEXT:    store <2 x i1> [[CMP1]], ptr [[PTR1:%.*]], align 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult <2 x i32> [[Z:%.*]], splat (i32 2)
+; CHECK-NEXT:    ret <2 x i1> [[CMP2]]
+;
+  %t0 = or <2 x i32> %x, splat (i32 1)
+  %cmp0 = icmp eq <2 x i32> %t0, splat (i32 1)
+  store <2 x i1> %cmp0, ptr %ptr0
+
+  %t1 = or <2 x i32> %y, splat (i32 1)
+  %cmp1 = icmp eq <2 x i32> %t1, splat (i32 1)
+  store <2 x i1> %cmp1, ptr %ptr1
+
+  %t2 = or <2 x i32> %z, splat (i32 1)
+  %cmp2 = icmp eq <2 x i32> %t2, splat (i32 1)
+  ret <2 x i1> %cmp2
+}
+
 ; X | C == C --> X <=u C (when C+1 is PowerOf2).
 
 define <2 x i1> @or3_eq3_vec(<2 x i8> %x) {