[llvm] aa250ce - Add optimizations for icmp eq/ne (mul(X, Y), 0)

Fri Jan 27 15:50:37 PST 2023

Author: Noah Goldstein
Date: 2023-01-27T17:45:35-06:00
New Revision: aa250ceb3ff15d55bb506b4bc8196f143133d8b5

URL: https://github.com/llvm/llvm-project/commit/aa250ceb3ff15d55bb506b4bc8196f143133d8b5
DIFF: https://github.com/llvm/llvm-project/commit/aa250ceb3ff15d55bb506b4bc8196f143133d8b5.diff

LOG: Add optimizations for icmp eq/ne (mul(X, Y), 0)

1. Add checks if X and/or Y are odd. The Odd values are unnecessary to
   the icmp: isZero(Odd * N) == isZero(N)

2. If neither X nor Y is known odd, then if X * Y cannot overflow AND
   if X and/or Y is non-zero, the non-zero values are unnecessary to the
   icmp.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D140850

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/icmp-binop.ll
    llvm/test/Transforms/InstCombine/pr38677.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 8e90014d24d8..41f1fe9a07c0 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1295,6 +1295,48 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
       return new ICmpInst(Pred, X, Cmp.getOperand(1));
   }
 
+  // (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are
+  // odd/non-zero/there is no overflow.
+  if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) &&
+      ICmpInst::isEquality(Pred)) {
+
+    KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
+    // if X % 2 != 0
+    //    (icmp eq/ne Y)
+    if (XKnown.countMaxTrailingZeros() == 0)
+      return new ICmpInst(Pred, Y, Cmp.getOperand(1));
+
+    KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
+    // if Y % 2 != 0
+    //    (icmp eq/ne X)
+    if (YKnown.countMaxTrailingZeros() == 0)
+      return new ICmpInst(Pred, X, Cmp.getOperand(1));
+
+    auto *BO0 = cast<OverflowingBinaryOperator>(Cmp.getOperand(0));
+    if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) {
+      const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
+      // `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()`
+      // but to avoid unnecessary work, first just if this is an obvious case.
+
+      // if X non-zero and NoOverflow(X * Y)
+      //    (icmp eq/ne Y)
+      if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
+        return new ICmpInst(Pred, Y, Cmp.getOperand(1));
+
+      // if Y non-zero and NoOverflow(X * Y)
+      //    (icmp eq/ne X)
+      if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT))
+        return new ICmpInst(Pred, X, Cmp.getOperand(1));
+    }
+    // Note, we are skipping cases:
+    //      if Y % 2 != 0 AND X % 2 != 0
+    //          (false/true)
+    //      if X non-zero and Y non-zero and NoOverflow(X * Y)
+    //          (false/true)
+    // Those can be simplified later as we would have already replaced the (icmp
+    // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
+    // will fold to a constant elsewhere.
+  }
   return nullptr;
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/icmp-binop.ll b/llvm/test/Transforms/InstCombine/icmp-binop.ll
index 1f4a0de2b812..60a12411ee91 100644
--- a/llvm/test/Transforms/InstCombine/icmp-binop.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-binop.ll
@@ -6,8 +6,7 @@ declare void @llvm.assume(i1)
 
 define i1 @mul_unkV_oddC_eq(i32 %v) {
 ; CHECK-LABEL: @mul_unkV_oddC_eq(
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[V:%.*]], 3
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MUL]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %mul = mul i32 %v, 3
@@ -28,8 +27,7 @@ define i1 @mul_unkV_oddC_eq_nonzero(i32 %v) {
 
 define <2 x i1> @mul_unkV_oddC_ne_vec(<2 x i64> %v) {
 ; CHECK-LABEL: @mul_unkV_oddC_ne_vec(
-; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i64> [[V:%.*]], <i64 3, i64 3>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i64> [[MUL]], zeroinitializer
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i64> [[V:%.*]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %mul = mul <2 x i64> %v, <i64 3, i64 3>
@@ -72,7 +70,7 @@ define i1 @mul_unkV_oddC_sge(i8 %v) {
 define i1 @mul_reused_unkV_oddC_ne(i64 %v) {
 ; CHECK-LABEL: @mul_reused_unkV_oddC_ne(
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[V:%.*]], 3
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[V]], 0
 ; CHECK-NEXT:    call void @use64(i64 [[MUL]])
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -87,8 +85,7 @@ define i1 @mul_assumeoddV_unkV_eq(i16 %v, i16 %v2) {
 ; CHECK-NEXT:    [[LB:%.*]] = and i16 [[V2:%.*]], 1
 ; CHECK-NEXT:    [[ODD:%.*]] = icmp ne i16 [[LB]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[ODD]])
-; CHECK-NEXT:    [[MUL:%.*]] = mul i16 [[V:%.*]], [[V2]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[MUL]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[V:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %lb = and i16 %v2, 1
@@ -105,7 +102,7 @@ define i1 @mul_reusedassumeoddV_unkV_ne(i64 %v, i64 %v2) {
 ; CHECK-NEXT:    [[ODD:%.*]] = icmp ne i64 [[LB]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[ODD]])
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[V]], [[V2:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[V2]], 0
 ; CHECK-NEXT:    call void @use64(i64 [[MUL]])
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -120,9 +117,7 @@ define i1 @mul_reusedassumeoddV_unkV_ne(i64 %v, i64 %v2) {
 
 define <2 x i1> @mul_setoddV_unkV_ne(<2 x i32> %v1, <2 x i32> %v2) {
 ; CHECK-LABEL: @mul_setoddV_unkV_ne(
-; CHECK-NEXT:    [[V:%.*]] = or <2 x i32> [[V1:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[V]], [[V2:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[MUL]], zeroinitializer
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V2:%.*]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %v = or <2 x i32> %v1, <i32 1, i32 1>
@@ -190,8 +185,7 @@ define i1 @mul_assumenzV_unkV_nsw_ne(i32 %v, i32 %v2) {
 ; CHECK-LABEL: @mul_assumenzV_unkV_nsw_ne(
 ; CHECK-NEXT:    [[NZ:%.*]] = icmp ne i32 [[V:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NZ]])
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[V]], [[V2:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MUL]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[V2:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %nz = icmp ne i32 %v, 0
@@ -229,9 +223,7 @@ define <2 x i1> @mul_unkV_unkV_nsw_nuw_ne(<2 x i16> %v, <2 x i16> %v2) {
 
 define i1 @mul_setnzV_unkV_nuw_eq(i8 %v1, i8 %v2) {
 ; CHECK-LABEL: @mul_setnzV_unkV_nuw_eq(
-; CHECK-NEXT:    [[V:%.*]] = or i8 [[V1:%.*]], 2
-; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i8 [[V]], [[V2:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[MUL]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[V2:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %v = or i8 %v1, 2
@@ -245,8 +237,7 @@ define i1 @mul_brnzV_unkV_nuw_eq(i64 %v, i64 %v2) {
 ; CHECK-NEXT:    [[NZ_NOT:%.*]] = icmp eq i64 [[V2:%.*]], 0
 ; CHECK-NEXT:    br i1 [[NZ_NOT]], label [[FALSE:%.*]], label [[TRUE:%.*]]
 ; CHECK:       true:
-; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i64 [[V:%.*]], [[V2]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[MUL]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[V:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ; CHECK:       false:
 ; CHECK-NEXT:    call void @use64(i64 [[V]])

diff  --git a/llvm/test/Transforms/InstCombine/pr38677.ll b/llvm/test/Transforms/InstCombine/pr38677.ll
index 63c0caba1013..bacee2c67676 100644
--- a/llvm/test/Transforms/InstCombine/pr38677.ll
+++ b/llvm/test/Transforms/InstCombine/pr38677.ll
@@ -12,9 +12,7 @@ define i32 @foo(i1 %which, ptr %dst) {
 ; CHECK-NEXT:    br label [[FINAL]]
 ; CHECK:       final:
 ; CHECK-NEXT:    [[USE2:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ select (i1 icmp eq (ptr @A, ptr @B), i32 2, i32 1), [[DELAY]] ]
-; CHECK-NEXT:    [[B7:%.*]] = mul i32 [[USE2]], 2147483647
-; CHECK-NEXT:    [[C3:%.*]] = icmp eq i32 [[B7]], 0
-; CHECK-NEXT:    store i1 [[C3]], ptr [[DST:%.*]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[DST:%.*]], align 1
 ; CHECK-NEXT:    ret i32 [[USE2]]
 ;
 entry: