[llvm] [InstCombine] Improve `(icmp pred (and X, Y), ...)` fold. (PR #66787)

via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 21 22:30:48 PDT 2023


https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/66787

>From 62e7e3aa46eac5913d0de7e1fea65d035ca63a1e Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:45:47 -0500
Subject: [PATCH 01/13] [InstCombine] Make `isFreeToInvert` check recursively.

Some Instructions (select/min/max) are inverted by just inverting the
operands. So the answer of whether they are free to invert is really
just whether the operands are free to invert.

Differential Revision: https://reviews.llvm.org/D159056
---
 .../Transforms/InstCombine/InstCombiner.h     | 55 +++++++++++--------
 .../InstCombine/minmax-intrinsics.ll          | 13 ++---
 llvm/test/Transforms/InstCombine/pr63791.ll   |  2 +-
 .../LoopVectorize/reduction-inloop.ll         | 25 +++++----
 4 files changed, 51 insertions(+), 44 deletions(-)

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index f8b3874267ded3b..3a1ac917da6a6cb 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -239,7 +239,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
   /// uses of V and only keep uses of ~V.
   ///
   /// See also: canFreelyInvertAllUsersOf()
-  static bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
+  static bool isFreeToInvert(Value *V, bool WillInvertAllUses,
+                             unsigned Depth = 0) {
     // ~(~(X)) -> X.
     if (match(V, m_Not(PatternMatch::m_Value())))
       return true;
@@ -248,32 +249,38 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
     if (match(V, PatternMatch::m_AnyIntegralConstant()))
       return true;
 
+    if (Depth++ >= MaxAnalysisRecursionDepth)
+      return false;
+
+    // The rest of the cases require that we invert all uses so don't bother
+    // doing the analysis if we know we can't use the result.
+    if (!WillInvertAllUses)
+      return false;
+
     // Compares can be inverted if all of their uses are being modified to use
     // the ~V.
     if (isa<CmpInst>(V))
-      return WillInvertAllUses;
-
-    // If `V` is of the form `A + Constant` then `-1 - V` can be folded into
-    // `(-1 - Constant) - A` if we are willing to invert all of the uses.
-    if (match(V, m_Add(PatternMatch::m_Value(), PatternMatch::m_ImmConstant())))
-      return WillInvertAllUses;
-
-    // If `V` is of the form `Constant - A` then `-1 - V` can be folded into
-    // `A + (-1 - Constant)` if we are willing to invert all of the uses.
-    if (match(V, m_Sub(PatternMatch::m_ImmConstant(), PatternMatch::m_Value())))
-      return WillInvertAllUses;
-
-    // Selects with invertible operands are freely invertible
-    if (match(V,
-              m_Select(PatternMatch::m_Value(), m_Not(PatternMatch::m_Value()),
-                       m_Not(PatternMatch::m_Value()))))
-      return WillInvertAllUses;
-
-    // Min/max may be in the form of intrinsics, so handle those identically
-    // to select patterns.
-    if (match(V, m_MaxOrMin(m_Not(PatternMatch::m_Value()),
-                            m_Not(PatternMatch::m_Value()))))
-      return WillInvertAllUses;
+      return true;
+
+    Value *A, *B;
+    // If `V` is of the form `A + B` then `-1 - V` can be folded into
+    // `~B - A` or `~A - B` if we are willing to invert all of the uses.
+    if (match(V, m_Add(PatternMatch::m_Value(A), PatternMatch::m_Value(B))))
+      return isFreeToInvert(A, A->hasOneUse(), Depth) ||
+             isFreeToInvert(B, B->hasOneUse(), Depth);
+
+    // If `V` is of the form `A - B` then `-1 - V` can be folded into
+    // `~A + B` if we are willing to invert all of the uses.
+    if (match(V, m_Sub(PatternMatch::m_Value(A), PatternMatch::m_Value())))
+      return isFreeToInvert(A, A->hasOneUse(), Depth);
+
+    // Selects/min/max with invertible operands are freely invertible
+    if (match(V, m_Select(PatternMatch::m_Value(), PatternMatch::m_Value(A),
+                          PatternMatch::m_Value(B))) ||
+        match(V,
+              m_MaxOrMin(PatternMatch::m_Value(A), PatternMatch::m_Value(B))))
+      return isFreeToInvert(A, A->hasOneUse(), Depth) &&
+             isFreeToInvert(B, B->hasOneUse(), Depth);
 
     return false;
   }
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 7a4da66ae2151c8..9ed25b358c887f1 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -1494,14 +1494,13 @@ define i8 @freeToInvert_two_minmax_ops_use3(i8 %x, i8 %y, i8 %z, i8 %w) {
 
 define i8 @sub_not_min_max(i8 %r, i8 %g, i8 %b) {
 ; CHECK-LABEL: @sub_not_min_max(
-; CHECK-NEXT:    [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
 ; CHECK-NEXT:    [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
 ; CHECK-NEXT:    call void @use(i8 [[NOTG]])
 ; CHECK-NEXT:    [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
 ; CHECK-NEXT:    call void @use(i8 [[NOTB]])
-; CHECK-NEXT:    [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
-; CHECK-NEXT:    [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
-; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[B]])
+; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
 ; CHECK-NEXT:    ret i8 [[CK]]
 ;
   %notr = xor i8 %r, -1
@@ -1523,9 +1522,9 @@ define i8 @sub_not_min_max_uses1(i8 %r, i8 %g, i8 %b) {
 ; CHECK-NEXT:    call void @use(i8 [[NOTG]])
 ; CHECK-NEXT:    [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
 ; CHECK-NEXT:    call void @use(i8 [[NOTB]])
-; CHECK-NEXT:    [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
-; CHECK-NEXT:    [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
-; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R]], i8 [[G]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[B]])
+; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
 ; CHECK-NEXT:    ret i8 [[CK]]
 ;
   %notr = xor i8 %r, -1
diff --git a/llvm/test/Transforms/InstCombine/pr63791.ll b/llvm/test/Transforms/InstCombine/pr63791.ll
index a489b2e3e6221a0..0fac19d583fe96c 100644
--- a/llvm/test/Transforms/InstCombine/pr63791.ll
+++ b/llvm/test/Transforms/InstCombine/pr63791.ll
@@ -17,7 +17,7 @@ define void @y() {
 ; CHECK:       for.cond5.preheader.i:
 ; CHECK-NEXT:    br i1 false, label [[FOR_INC19_I:%.*]], label [[FOR_COND1_LOOPEXIT_I:%.*]]
 ; CHECK:       for.inc19.i:
-; CHECK-NEXT:    br i1 false, label [[FOR_INC19_I]], label [[FOR_COND1_LOOPEXIT_I]]
+; CHECK-NEXT:    br i1 true, label [[FOR_COND1_LOOPEXIT_I]], label [[FOR_INC19_I]]
 ;
 entry:
   br label %for.cond.i
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 18b05c05d9b9d21..305da23709fa5e7 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -1292,7 +1292,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[INDEX]], 3
@@ -1353,23 +1353,24 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP42]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP44:%.*]] = icmp ne <4 x i32> [[TMP43]], zeroinitializer
-; CHECK-NEXT:    [[TMP45:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP44]], <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP46:%.*]] = xor <4 x i1> [[TMP19]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP47:%.*]] = or <4 x i1> [[TMP45]], [[TMP46]]
-; CHECK-NEXT:    [[TMP48:%.*]] = bitcast <4 x i1> [[TMP47]] to i4
-; CHECK-NEXT:    [[TMP49:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP48]]), !range [[RNG42:![0-9]+]]
-; CHECK-NEXT:    [[TMP50:%.*]] = zext i4 [[TMP49]] to i32
-; CHECK-NEXT:    [[TMP51]] = add i32 [[VEC_PHI]], [[TMP50]]
+; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq <4 x i32> [[TMP43]], zeroinitializer
+; CHECK-NEXT:    [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP45:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP44]]
+; CHECK-NEXT:    [[DOTNOT7:%.*]] = and <4 x i1> [[TMP45]], [[TMP19]]
+; CHECK-NEXT:    [[NOT__NOT7:%.*]] = xor <4 x i1> [[DOTNOT7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP46:%.*]] = bitcast <4 x i1> [[NOT__NOT7]] to i4
+; CHECK-NEXT:    [[TMP47:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP46]]), !range [[RNG42:![0-9]+]]
+; CHECK-NEXT:    [[TMP48:%.*]] = zext i4 [[TMP47]] to i32
+; CHECK-NEXT:    [[TMP49]] = add i32 [[VEC_PHI]], [[TMP48]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[A_1_LCSSA]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 poison, label [[LOR_LHS_FALSE:%.*]], label [[IF_THEN:%.*]]

>From e7bd0e174eb53df002c9f910390d310c99c0612e Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Mon, 18 Sep 2023 14:59:58 -0500
Subject: [PATCH 02/13] [InstCombine] Add additional tests for free inversion;
 NFC

---
 .../Transforms/InstCombine/free-inversion.ll  | 403 ++++++++++++++++++
 1 file changed, 403 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/free-inversion.ll

diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
new file mode 100644
index 000000000000000..a2931fb9a081109
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -0,0 +1,403 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i8 @llvm.smin.i8(i8, i8)
+declare i8 @llvm.umin.i8(i8, i8)
+declare i8 @llvm.smax.i8(i8, i8)
+declare i8 @llvm.umax.i8(i8, i8)
+
+declare void @use.i8(i8)
+
+define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @xor_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = xor i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = xor i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @xor_2(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @xor_2(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = xor i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = xor i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @xor_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @xor_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[Y:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = xor i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %b = select i1 %c, i8 %nx, i8 %y
+  %ab = xor i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @add_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @add_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = add i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = add i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @add_2(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @add_2(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = add i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @add_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @add_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, %a
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = add i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @sub_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = sub i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[B_NEG_V:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[B_NEG:%.*]] = add i8 [[B_NEG_V]], 1
+; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B_NEG]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = sub i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    call void @use.i8(i8 [[NX]])
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  call void @use.i8(i8 %nx)
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = sub i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @ashr_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @ashr_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = ashr i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = ashr i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @ashr_2_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @ashr_2_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = ashr i8 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = ashr i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @select_1(i1 %cc, i8 %na, i8 %aa, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @select_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NA:%.*]], [[AA:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[TMP1]], 45
+; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i8 [[A]], i8 [[B]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, 45
+  %a = xor i8 %aa, %nna
+  %ab = select i1 %cc, i8 %a, i8 %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @select_2(i1 %cc, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @select_2(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[NA:%.*]], 45
+; CHECK-NEXT:    [[BA:%.*]] = select i1 [[CC:%.*]], i8 [[B]], i8 [[A]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %a = xor i8 %na, 45
+  %ba = select i1 %cc, i8 %b, i8 %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i1 @select_logic_or_fail(i1 %cc, i1 %c, i1 %x, i8 %y) {
+; CHECK-LABEL: @select_logic_or_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[YY:%.*]] = icmp eq i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i1 [[NX]], i1 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i1 [[B]], i1 false
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i1 [[AB]], true
+; CHECK-NEXT:    ret i1 [[NOT_AB]]
+;
+  %nx = xor i1 %x, -1
+  %yy = icmp eq i8 %y, 123
+  %b = select i1 %c, i1 %nx, i1 %yy
+  %ab = select i1 %cc, i1 %b, i1 false
+  %not_ab = xor i1 %ab, -1
+  ret i1 %not_ab
+}
+
+define i1 @select_logic_and_fail(i1 %cc, i1 %c, i1 %x, i8 %y) {
+; CHECK-LABEL: @select_logic_and_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[YY:%.*]] = icmp eq i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i1 [[NX]], i1 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i1 true, i1 [[B]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i1 [[AB]], true
+; CHECK-NEXT:    ret i1 [[NOT_AB]]
+;
+  %nx = xor i1 %x, -1
+  %yy = icmp eq i8 %y, 123
+  %b = select i1 %c, i1 %nx, i1 %yy
+  %ab = select i1 %cc, i1 true, i1 %b
+  %not_ab = xor i1 %ab, -1
+  ret i1 %not_ab
+}
+
+define i8 @smin_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smin_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[NNA]], [[AA:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smin.i8(i8 [[A]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, -1
+  %a = add i8 %aa, %nna
+  %ab = call i8 @llvm.smin.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @smin_1_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smin_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smin.i8(i8 [[A:%.*]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = call i8 @llvm.smin.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @umin_1_fail(i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @umin_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[Y:%.*]]
+; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umin.i8(i8 [[B]], i8 85)
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %b = select i1 %c, i8 %nx, i8 %y
+  %ba = call i8 @llvm.umin.i8(i8 %b, i8 85)
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @smax_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smax_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[NNA]], [[AA:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, -1
+  %a = sub i8 %nna, %aa
+  %ab = call i8 @llvm.smax.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @smax_1_fail(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smax_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    call void @use.i8(i8 [[YY]])
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[NNA]], [[AA:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  call void @use.i8(i8 %yy)
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, -1
+  %a = sub i8 %nna, %aa
+  %ab = call i8 @llvm.smax.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @umax_1(i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @umax_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 85)
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = call i8 @llvm.umax.i8(i8 %b, i8 85)
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @umax_1_fail(i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @umax_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    call void @use.i8(i8 [[B]])
+; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 85)
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  call void @use.i8(i8 %b)
+  %ba = call i8 @llvm.umax.i8(i8 %b, i8 85)
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}

>From 4ee977f6b7f964d92844b41f400e9b3431637e81 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Mon, 18 Sep 2023 15:12:51 -0500
Subject: [PATCH 03/13] [InstCombine] add `getFreeInverted` to perform folds
 for free inversion of op

With the current logic of `if(isFreeToInvert(Op)) return Not(Op)` its
fairly easy to either 1) cause regressions or 2) infinite loops
if the folds we have for `Not(Op)` ever de-sync with the cases we
know are freely invertible.

This patch adds `getFreeInverted` which is able to build the free
inverted op along with check for free inversion to alleviate this
problem.
---
 .../Transforms/InstCombine/InstCombiner.h     | 128 +++++++++++++-----
 .../InstCombine/InstCombineAndOrXor.cpp       |   5 +
 .../Transforms/InstCombine/free-inversion.ll  | 110 ++++++---------
 .../Transforms/InstCombine/icmp-of-or-x.ll    |   6 +-
 llvm/test/Transforms/InstCombine/pr63791.ll   |   2 +-
 .../LoopVectorize/ARM/mve-selectandorcost.ll  |   2 +-
 .../LoopVectorize/reduction-inloop.ll         |  25 ++--
 7 files changed, 158 insertions(+), 120 deletions(-)

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index 3a1ac917da6a6cb..a2e3c176cfab5df 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -233,56 +233,116 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
                                                 PatternMatch::m_Value()));
   }
 
-  /// Return true if the specified value is free to invert (apply ~ to).
-  /// This happens in cases where the ~ can be eliminated.  If WillInvertAllUses
-  /// is true, work under the assumption that the caller intends to remove all
-  /// uses of V and only keep uses of ~V.
-  ///
-  /// See also: canFreelyInvertAllUsersOf()
-  static bool isFreeToInvert(Value *V, bool WillInvertAllUses,
-                             unsigned Depth = 0) {
+  /// Return nonnull value if V is free to invert (with condition) regarding
+  /// WillInvertAllUses.
+  /// If Builder is nonnull, it will return a simplified ~V
+  /// If builder is null, it will return an arbitrary nonnull value (not
+  /// dereferenceable).
+  static Value *getFreeInverted(Value *V, bool WillInvertAllUses,
+                                BuilderTy *Builder, unsigned Depth = 0) {
+    static Value *const kNonNull = reinterpret_cast<Value *>(uintptr_t(1));
     // ~(~(X)) -> X.
-    if (match(V, m_Not(PatternMatch::m_Value())))
-      return true;
+    Value *A, *B;
+    if (match(V, m_Not(PatternMatch::m_Value(A))))
+      return A;
 
+    Constant *C;
     // Constants can be considered to be not'ed values.
-    if (match(V, PatternMatch::m_AnyIntegralConstant()))
-      return true;
+    if (match(V, PatternMatch::m_ImmConstant(C)))
+      return ConstantExpr::getNot(C);
 
     if (Depth++ >= MaxAnalysisRecursionDepth)
-      return false;
+      return nullptr;
 
     // The rest of the cases require that we invert all uses so don't bother
     // doing the analysis if we know we can't use the result.
     if (!WillInvertAllUses)
-      return false;
+      return nullptr;
 
     // Compares can be inverted if all of their uses are being modified to use
     // the ~V.
-    if (isa<CmpInst>(V))
-      return true;
+    if (auto *I = dyn_cast<CmpInst>(V)) {
+      if (Builder != nullptr)
+        return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
+                                  I->getOperand(1));
+      return kNonNull;
+    }
 
-    Value *A, *B;
-    // If `V` is of the form `A + B` then `-1 - V` can be folded into
-    // `~B - A` or `~A - B` if we are willing to invert all of the uses.
-    if (match(V, m_Add(PatternMatch::m_Value(A), PatternMatch::m_Value(B))))
-      return isFreeToInvert(A, A->hasOneUse(), Depth) ||
-             isFreeToInvert(B, B->hasOneUse(), Depth);
+    // If `V` is of the form `A + Constant` then `-1 - V` can be folded into
+    // `(-1 - Constant) - A` if we are willing to invert all of the uses.
+    if (match(V, m_Add(PatternMatch::m_Value(A), PatternMatch::m_Value(B)))) {
+      if (auto *BV = getFreeInverted(B, B->hasOneUse(), Builder, Depth))
+        return Builder ? Builder->CreateSub(BV, A) : kNonNull;
+      if (auto *AV = getFreeInverted(A, A->hasOneUse(), Builder, Depth))
+        return Builder ? Builder->CreateSub(AV, B) : kNonNull;
+      return nullptr;
+    }
 
-    // If `V` is of the form `A - B` then `-1 - V` can be folded into
-    // `~A + B` if we are willing to invert all of the uses.
-    if (match(V, m_Sub(PatternMatch::m_Value(A), PatternMatch::m_Value())))
-      return isFreeToInvert(A, A->hasOneUse(), Depth);
+    // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
+    // into `A ^ B` if we are willing to invert all of the uses.
+    if (match(V, m_Xor(PatternMatch::m_Value(A), PatternMatch::m_Value(B)))) {
+      if (auto *BV = getFreeInverted(B, B->hasOneUse(), Builder, Depth))
+        return Builder ? Builder->CreateXor(A, BV) : kNonNull;
+      if (auto *AV = getFreeInverted(A, A->hasOneUse(), Builder, Depth))
+        return Builder ? Builder->CreateXor(AV, B) : kNonNull;
+      return nullptr;
+    }
 
+    // If `V` is of the form `Constant - A` then `-1 - V` can be folded into
+    // `A + (-1 - Constant)` if we are willing to invert all of the uses.
+    if (match(V, m_Sub(PatternMatch::m_Value(A), PatternMatch::m_Value(B)))) {
+      if (auto *AV = getFreeInverted(A, A->hasOneUse(), Builder, Depth))
+        return Builder ? Builder->CreateAdd(AV, B) : kNonNull;
+      return nullptr;
+    }
+
+    // If `V` is of the form `Constant - A` then `-1 - V` can be folded into
+    // `A + (-1 - Constant)` if we are willing to invert all of the uses.
+    if (match(V, m_AShr(PatternMatch::m_Value(A), PatternMatch::m_Value(B)))) {
+      if (auto *AV = getFreeInverted(A, A->hasOneUse(), Builder, Depth))
+        return Builder ? Builder->CreateAShr(AV, B) : kNonNull;
+      return nullptr;
+    }
+
+    Value *Cond;
+    // LogicOps are special in that we canonicalize them at the cost of an
+    // instruction.
+    bool IsSelect =
+        match(V, m_Select(PatternMatch::m_Value(Cond), PatternMatch::m_Value(A),
+                          PatternMatch::m_Value(B))) &&
+        !match(V, PatternMatch::m_LogicalOp(PatternMatch::m_Value(),
+                                            PatternMatch::m_Value()));
     // Selects/min/max with invertible operands are freely invertible
-    if (match(V, m_Select(PatternMatch::m_Value(), PatternMatch::m_Value(A),
-                          PatternMatch::m_Value(B))) ||
-        match(V,
-              m_MaxOrMin(PatternMatch::m_Value(A), PatternMatch::m_Value(B))))
-      return isFreeToInvert(A, A->hasOneUse(), Depth) &&
-             isFreeToInvert(B, B->hasOneUse(), Depth);
-
-    return false;
+    if (IsSelect || match(V, m_MaxOrMin(PatternMatch::m_Value(A),
+                                        PatternMatch::m_Value(B)))) {
+      if (!getFreeInverted(A, A->hasOneUse(), /*Builder*/ nullptr, Depth) ||
+          !getFreeInverted(B, B->hasOneUse(), /*Builder*/ nullptr, Depth))
+        return nullptr;
+      if (Builder != nullptr) {
+        A = getFreeInverted(A, A->hasOneUse(), Builder, Depth);
+        B = getFreeInverted(B, B->hasOneUse(), Builder, Depth);
+        assert(
+            A != nullptr && B != nullptr &&
+            "Unable to build inverted values for known freely invertable ops");
+        if (auto *II = dyn_cast<IntrinsicInst>(V))
+          return Builder->CreateBinaryIntrinsic(II->getIntrinsicID(), A, B);
+        return Builder->CreateSelect(Cond, A, B);
+      }
+      return kNonNull;
+    }
+
+    return nullptr;
+  }
+
+  /// Return true if the specified value is free to invert (apply ~ to).
+  /// This happens in cases where the ~ can be eliminated.  If WillInvertAllUses
+  /// is true, work under the assumption that the caller intends to remove all
+  /// uses of V and only keep uses of ~V.
+  ///
+  /// See also: canFreelyInvertAllUsersOf()
+  static bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
+    return getFreeInverted(V, WillInvertAllUses, /*Builder*/ nullptr,
+                           /*Depth*/ 0) != nullptr;
   }
 
   /// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3e0218d9b76d1f7..5a3d6dd20d60a08 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -4410,6 +4410,11 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
   if (Instruction *NewXor = foldNotXor(I, Builder))
     return NewXor;
 
+  // TODO: Could handle multi-use better by checking if all uses of NotOp (other
+  // than I) can be inverted.
+  if (Value *R = getFreeInverted(NotOp, NotOp->hasOneUse(), &Builder))
+    return replaceInstUsesWith(I, R);
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
index a2931fb9a081109..0d694d3f72e3afa 100644
--- a/llvm/test/Transforms/InstCombine/free-inversion.ll
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -10,11 +10,9 @@ declare void @use.i8(i8)
 
 define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @xor_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = xor i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[B_NOT:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[B_NOT]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -27,11 +25,9 @@ define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @xor_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @xor_2(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[AB:%.*]] = xor i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[B_NOT:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[B_NOT]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -59,11 +55,9 @@ define i8 @xor_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @add_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @add_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = add i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = sub i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -76,11 +70,9 @@ define i8 @add_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @add_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @add_2(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = sub i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -110,11 +102,9 @@ define i8 @add_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @sub_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
-; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -129,9 +119,8 @@ define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
 ; CHECK-NEXT:    [[B_NEG_V:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
-; CHECK-NEXT:    [[B_NEG:%.*]] = add i8 [[B_NEG_V]], 1
-; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B_NEG]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[B_NEG_V]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = sub i8 -2, [[TMP2]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -145,11 +134,10 @@ define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_fail(
 ; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
 ; CHECK-NEXT:    call void @use.i8(i8 [[NX]])
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
-; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -163,11 +151,9 @@ define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @ashr_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = ashr i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = ashr i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -197,13 +183,11 @@ define i8 @ashr_2_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @select_1(i1 %cc, i8 %na, i8 %aa, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @select_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NA:%.*]], [[AA:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[TMP1]], 45
-; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i8 [[A]], i8 [[B]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[TMP1]], -46
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP3]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = select i1 [[CC:%.*]], i8 [[TMP2]], i8 [[TMP4]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -218,12 +202,10 @@ define i8 @select_1(i1 %cc, i8 %na, i8 %aa, i1 %c, i8 %x, i8 %y) {
 
 define i8 @select_2(i1 %cc, i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @select_2(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[NA:%.*]], 45
-; CHECK-NEXT:    [[BA:%.*]] = select i1 [[CC:%.*]], i8 [[B]], i8 [[A]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[NA:%.*]], -46
+; CHECK-NEXT:    [[NOT_BA:%.*]] = select i1 [[CC:%.*]], i8 [[TMP2]], i8 [[TMP3]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -271,13 +253,10 @@ define i1 @select_logic_and_fail(i1 %cc, i1 %c, i1 %x, i8 %y) {
 
 define i8 @smin_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @smin_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[NNA]], [[AA:%.*]]
-; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smin.i8(i8 [[A]], i8 [[B]])
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 [[NA:%.*]], [[AA:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP2]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[TMP3]])
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -324,13 +303,10 @@ define i8 @umin_1_fail(i1 %c, i8 %x, i8 %y) {
 
 define i8 @smax_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @smax_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = sub i8 [[NNA]], [[AA:%.*]]
-; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 [[B]])
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[NA:%.*]], [[AA:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP2]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP1]], i8 [[TMP3]])
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -368,11 +344,9 @@ define i8 @smax_1_fail(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
 
 define i8 @umax_1(i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @umax_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 85)
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = call i8 @llvm.umin.i8(i8 [[TMP2]], i8 -86)
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 839aa98a8b24e2f..4b8df439b846fb6 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -121,9 +121,9 @@ define i1 @or_ne_notY_eq_1s(i8 %x, i8 %y) {
 
 define i1 @or_ne_notY_eq_1s_fail_bad_not(i8 %x, i8 %y) {
 ; CHECK-LABEL: @or_ne_notY_eq_1s_fail_bad_not(
-; CHECK-NEXT:    [[NY:%.*]] = xor i8 [[Y:%.*]], -2
-; CHECK-NEXT:    [[OR:%.*]] = or i8 [[NY]], [[X:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[OR]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ny = xor i8 %y, -2
diff --git a/llvm/test/Transforms/InstCombine/pr63791.ll b/llvm/test/Transforms/InstCombine/pr63791.ll
index 0fac19d583fe96c..a489b2e3e6221a0 100644
--- a/llvm/test/Transforms/InstCombine/pr63791.ll
+++ b/llvm/test/Transforms/InstCombine/pr63791.ll
@@ -17,7 +17,7 @@ define void @y() {
 ; CHECK:       for.cond5.preheader.i:
 ; CHECK-NEXT:    br i1 false, label [[FOR_INC19_I:%.*]], label [[FOR_COND1_LOOPEXIT_I:%.*]]
 ; CHECK:       for.inc19.i:
-; CHECK-NEXT:    br i1 true, label [[FOR_COND1_LOOPEXIT_I]], label [[FOR_INC19_I]]
+; CHECK-NEXT:    br i1 false, label [[FOR_INC19_I]], label [[FOR_COND1_LOOPEXIT_I]]
 ;
 entry:
   br label %for.cond.i
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
index 5d85a4cd73fddfd..c4d1607ff640af5 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
@@ -86,7 +86,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %
 ; CHECK-NEXT:    [[ACCUM_1]] = phi float [ [[ADD4]], [[IF_THEN]] ], [ [[ACCUM_017]], [[WHILE_BODY]] ]
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLOCKSIZE_ADDR_018]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    [[ACCUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ACCUM_1]], [[IF_END]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret float [[ACCUM_0_LCSSA]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 305da23709fa5e7..18b05c05d9b9d21 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -1292,7 +1292,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[INDEX]], 3
@@ -1353,24 +1353,23 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP42]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq <4 x i32> [[TMP43]], zeroinitializer
-; CHECK-NEXT:    [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP45:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP44]]
-; CHECK-NEXT:    [[DOTNOT7:%.*]] = and <4 x i1> [[TMP45]], [[TMP19]]
-; CHECK-NEXT:    [[NOT__NOT7:%.*]] = xor <4 x i1> [[DOTNOT7]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP46:%.*]] = bitcast <4 x i1> [[NOT__NOT7]] to i4
-; CHECK-NEXT:    [[TMP47:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP46]]), !range [[RNG42:![0-9]+]]
-; CHECK-NEXT:    [[TMP48:%.*]] = zext i4 [[TMP47]] to i32
-; CHECK-NEXT:    [[TMP49]] = add i32 [[VEC_PHI]], [[TMP48]]
+; CHECK-NEXT:    [[TMP44:%.*]] = icmp ne <4 x i32> [[TMP43]], zeroinitializer
+; CHECK-NEXT:    [[TMP45:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP44]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP46:%.*]] = xor <4 x i1> [[TMP19]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP47:%.*]] = or <4 x i1> [[TMP45]], [[TMP46]]
+; CHECK-NEXT:    [[TMP48:%.*]] = bitcast <4 x i1> [[TMP47]] to i4
+; CHECK-NEXT:    [[TMP49:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP48]]), !range [[RNG42:![0-9]+]]
+; CHECK-NEXT:    [[TMP50:%.*]] = zext i4 [[TMP49]] to i32
+; CHECK-NEXT:    [[TMP51]] = add i32 [[VEC_PHI]], [[TMP50]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[A_1_LCSSA]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 poison, label [[LOR_LHS_FALSE:%.*]], label [[IF_THEN:%.*]]

>From 56ed3e9ba9ed99fb2229b111449e42fd9b46ef1d Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:45:49 -0500
Subject: [PATCH 04/13] [InstCombine] Add tests for expanding
 `foldICmpWithLowBitMaskedVal`; NFC

Differential Revision: https://reviews.llvm.org/D159057
---
 .../InstCombine/icmp-and-lowbit-mask.ll       | 651 ++++++++++++++++++
 1 file changed, 651 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll

diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
new file mode 100644
index 000000000000000..d815183577bbcbf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -0,0 +1,651 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare void @llvm.assume(i1)
+declare i8 @llvm.ctpop.i8(i8)
+declare i8 @llvm.umin.i8(i8, i8)
+declare i8 @llvm.umax.i8(i8, i8)
+declare i8 @llvm.smin.i8(i8, i8)
+declare i8 @llvm.smax.i8(i8, i8)
+declare i8 @llvm.bitreverse.i8(i8)
+declare void @use.i8(i8)
+declare void @use.i16(i16)
+define i1 @src_is_mask_zext(i16 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_zext(
+; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[M_IN:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = zext i8 [[M_IN]] to i16
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i16 %x_in, 123
+  %m_in = lshr i8 -1, %y
+  %mask = zext i8 %m_in to i16
+
+  %and = and i16 %x, %mask
+  %r = icmp eq i16 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_zext_fail_not_mask(i16 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_zext_fail_not_mask(
+; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[M_IN:%.*]] = lshr i8 -2, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = zext i8 [[M_IN]] to i16
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i16 %x_in, 123
+  %m_in = lshr i8 -2, %y
+  %mask = zext i8 %m_in to i16
+
+  %and = and i16 %x, %mask
+  %r = icmp eq i16 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_sext(i16 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_sext(
+; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[M_IN:%.*]] = lshr i8 31, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = zext i8 [[M_IN]] to i16
+; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i16 [[MASK]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i16 %x_in, 123
+  %m_in = lshr i8 31, %y
+  %mask = sext i8 %m_in to i16
+  %notmask = xor i16 %mask, -1
+
+  %and = and i16 %notmask, %x
+  %r = icmp eq i16 %and, 0
+  ret i1 %r
+}
+
+define i1 @src_is_mask_sext_fail_multiuse(i16 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_sext_fail_multiuse(
+; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 122
+; CHECK-NEXT:    [[M_IN:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[M_IN]], -1
+; CHECK-NEXT:    [[NOTMASK:%.*]] = sext i8 [[TMP1]] to i16
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    call void @use.i16(i16 [[AND]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i16 %x_in, 123
+  %m_in = lshr i8 -1, %y
+  %mask = sext i8 %m_in to i16
+  %notmask = xor i16 %mask, -1
+
+  %and = and i16 %notmask, %x
+  call void @use.i16(i16 %and)
+  %r = icmp eq i16 %and, 0
+  ret i1 %r
+}
+
+define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) {
+; CHECK-LABEL: @src_is_mask_and(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[MY:%.*]] = lshr i8 7, [[Y:%.*]]
+; CHECK-NEXT:    [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X]], [[AND]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %my = ashr i8 7, %y
+  %mz = lshr i8 -1, %z
+  %mask = and i8 %my, %mz
+
+  %and = and i8 %x, %mask
+  %r = icmp eq i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_and_fail_mixed(i8 %x_in, i8 %y, i8 %z) {
+; CHECK-LABEL: @src_is_mask_and_fail_mixed(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[MY:%.*]] = ashr i8 -8, [[Y:%.*]]
+; CHECK-NEXT:    [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X]], [[AND]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %my = ashr i8 -8, %y
+  %mz = lshr i8 -1, %z
+  %mask = and i8 %my, %mz
+
+  %and = and i8 %x, %mask
+  %r = icmp eq i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_or(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_or(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[MY:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[MY]], 7
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X]], [[AND]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %my = lshr i8 -1, %y
+  %mask = and i8 %my, 7
+
+  %and = and i8 %mask, %x
+  %r = icmp eq i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_xor(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_xor(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[MASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %mask = xor i8 %y, %y_m1
+  %and = and i8 %x, %mask
+  %r = icmp ne i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_xor_fail_notmask(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_xor_fail_notmask(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1_NOT:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[Y_M1_NOT]], [[Y]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %mask = xor i8 %y, %y_m1
+  %notmask = xor i8 %mask, -1
+  %and = and i8 %x, %notmask
+  %r = icmp ne i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_select(i8 %x_in, i8 %y, i1 %cond) {
+; CHECK-LABEL: @src_is_mask_select(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[MASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %mask = select i1 %cond, i8 %ymask, i8 15
+
+  %and = and i8 %mask, %x
+  %r = icmp ne i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_select_fail_wrong_pattern(i8 %x_in, i8 %y, i1 %cond, i8 %z) {
+; CHECK-LABEL: @src_is_mask_select_fail_wrong_pattern(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[MASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %mask = select i1 %cond, i8 %ymask, i8 15
+
+  %and = and i8 %mask, %x
+  %r = icmp ne i8 %and, %z
+  ret i1 %r
+}
+
+define i1 @src_is_mask_shl_lshr(i8 %x_in, i8 %y, i1 %cond) {
+; CHECK-LABEL: @src_is_mask_shl_lshr(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 122
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[TMP1]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %m_shl = shl i8 -1, %y
+  %mask = lshr i8 %m_shl, %y
+  %notmask = xor i8 %mask, -1
+
+  %and = and i8 %x, %notmask
+  %r = icmp ne i8 0, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_shl_lshr_fail_not_allones(i8 %x_in, i8 %y, i1 %cond) {
+; CHECK-LABEL: @src_is_mask_shl_lshr_fail_not_allones(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[TMP1]], -2
+; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[MASK]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %m_shl = shl i8 -2, %y
+  %mask = lshr i8 %m_shl, %y
+  %notmask = xor i8 %mask, -1
+
+  %and = and i8 %x, %notmask
+  %r = icmp ne i8 0, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
+; CHECK-LABEL: @src_is_mask_lshr(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
+; CHECK-NEXT:    [[MASK:%.*]] = lshr i8 [[SMASK]], [[Z:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X]], [[AND]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %smask = select i1 %cond, i8 %ymask, i8 15
+  %mask = lshr i8 %smask, %z
+  %and = and i8 %mask, %x
+  %r = icmp ne i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
+; CHECK-LABEL: @src_is_mask_ashr(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
+; CHECK-NEXT:    [[MASK:%.*]] = ashr i8 [[SMASK]], [[Z:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %smask = select i1 %cond, i8 %ymask, i8 15
+  %mask = ashr i8 %smask, %z
+  %and = and i8 %x, %mask
+  %r = icmp ult i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_p2_m1(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[P2ORZ:%.*]] = shl i8 2, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = add i8 [[P2ORZ]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %p2orz = shl i8 2, %y
+  %mask = add i8 %p2orz, -1
+  %and = and i8 %mask, %x
+  %r = icmp ult i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_umax(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_umax(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.umax.i8(i8 [[YMASK]], i8 3)
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %mask = call i8 @llvm.umax.i8(i8 %ymask, i8 3)
+
+  %and = and i8 %x, %mask
+  %r = icmp ugt i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) {
+; CHECK-LABEL: @src_is_mask_umin(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[ZMASK:%.*]] = lshr i8 15, [[Z:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 [[ZMASK]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %zmask = lshr i8 15, %z
+  %mask = call i8 @llvm.umin.i8(i8 %ymask, i8 %zmask)
+
+  %and = and i8 %mask, %x
+  %r = icmp ugt i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_umin_fail_mismatch(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_umin_fail_mismatch(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 -32)
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %mask = call i8 @llvm.umin.i8(i8 %ymask, i8 -32)
+
+  %and = and i8 %mask, %x
+  %r = icmp ugt i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_smax(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_smax(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.smax.i8(i8 [[YMASK]], i8 -1)
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %mask = call i8 @llvm.smax.i8(i8 %ymask, i8 -1)
+
+  %and = and i8 %x, %mask
+  %r = icmp uge i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_smin(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_smin(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.smin.i8(i8 [[YMASK]], i8 0)
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %y_m1 = add i8 %y, -1
+  %ymask = xor i8 %y, %y_m1
+  %mask = call i8 @llvm.smin.i8(i8 %ymask, i8 0)
+
+  %and = and i8 %mask, %x
+  %r = icmp uge i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_mask_bitreverse_not_mask(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[NMASK:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %nmask = shl i8 -1, %y
+  %mask = call i8 @llvm.bitreverse.i8(i8 %nmask)
+
+  %and = and i8 %x, %mask
+  %r = icmp ule i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_sext(i16 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_notmask_sext(
+; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[M_IN:%.*]] = shl i8 -8, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[M_IN]], -1
+; CHECK-NEXT:    [[MASK:%.*]] = sext i8 [[TMP1]] to i16
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i16 %x_in, 123
+  %m_in = shl i8 -8, %y
+  %nmask = sext i8 %m_in to i16
+  %mask = xor i16 %nmask, -1
+  %and = and i16 %mask, %x
+  %r = icmp ule i16 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_shl(i8 %x_in, i8 %y, i1 %cond) {
+; CHECK-LABEL: @src_is_notmask_shl(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 122
+; CHECK-NEXT:    [[NMASK:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
+; CHECK-NEXT:    [[NOTMASK0:%.*]] = xor i8 [[MASK]], -1
+; CHECK-NEXT:    [[NOTMASK:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 -8
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %nmask = shl i8 -1, %y
+  %mask = call i8 @llvm.bitreverse.i8(i8 %nmask)
+  %notmask0 = xor i8 %mask, -1
+  %notmask = select i1 %cond, i8 %notmask0, i8 -8
+  %and = and i8 %x, %notmask
+  %r = icmp eq i8 %and, 0
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_shl_fail_multiuse_invert(i8 %x_in, i8 %y, i1 %cond) {
+; CHECK-LABEL: @src_is_notmask_shl_fail_multiuse_invert(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 122
+; CHECK-NEXT:    [[NMASK:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
+; CHECK-NEXT:    [[NOTMASK0:%.*]] = xor i8 [[MASK]], -1
+; CHECK-NEXT:    [[NOTMASK:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 -8
+; CHECK-NEXT:    call void @use.i8(i8 [[NOTMASK]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %nmask = shl i8 -1, %y
+  %mask = call i8 @llvm.bitreverse.i8(i8 %nmask)
+  %notmask0 = xor i8 %mask, -1
+  %notmask = select i1 %cond, i8 %notmask0, i8 -8
+  call void @use.i8(i8 %notmask)
+  %and = and i8 %x, %notmask
+  %r = icmp eq i8 %and, 0
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_lshr_shl(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_notmask_lshr_shl(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %mask_shr = lshr i8 -1, %y
+  %nmask = shl i8 %mask_shr, %y
+  %mask = xor i8 %nmask, -1
+  %and = and i8 %mask, %x
+  %r = icmp eq i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_lshr_shl_fail_mismatch_shifts(i8 %x_in, i8 %y, i8 %z) {
+; CHECK-LABEL: @src_is_notmask_lshr_shl_fail_mismatch_shifts(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[MASK_SHR:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[NMASK:%.*]] = shl i8 [[MASK_SHR]], [[Z:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = xor i8 [[NMASK]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %mask_shr = lshr i8 -1, %y
+  %nmask = shl i8 %mask_shr, %z
+  %mask = xor i8 %nmask, -1
+  %and = and i8 %mask, %x
+  %r = icmp eq i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_ashr(i16 %x_in, i8 %y, i16 %z) {
+; CHECK-LABEL: @src_is_notmask_ashr(
+; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[M_IN:%.*]] = shl i8 -32, [[Y:%.*]]
+; CHECK-NEXT:    [[NMASK:%.*]] = sext i8 [[M_IN]] to i16
+; CHECK-NEXT:    [[NMASK_SHR:%.*]] = ashr i16 [[NMASK]], [[Z:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = xor i16 [[NMASK_SHR]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[X]], [[AND]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i16 %x_in, 123
+  %m_in = shl i8 -32, %y
+  %nmask = sext i8 %m_in to i16
+  %nmask_shr = ashr i16 %nmask, %z
+  %mask = xor i16 %nmask_shr, -1
+  %and = and i16 %x, %mask
+  %r = icmp eq i16 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_neg_p2(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_notmask_neg_p2(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[P2:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT:    [[NMASK:%.*]] = sub i8 0, [[P2]]
+; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
+; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[MASK]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %ny = sub i8 0, %y
+  %p2 = and i8 %ny, %y
+  %nmask = sub i8 0, %p2
+  %mask = call i8 @llvm.bitreverse.i8(i8 %nmask)
+  %notmask = xor i8 %mask, -1
+  %and = and i8 %notmask, %x
+  %r = icmp eq i8 0, %and
+  ret i1 %r
+}
+
+define i1 @src_is_notmask_neg_p2_fail_not_invertable(i8 %x_in, i8 %y) {
+; CHECK-LABEL: @src_is_notmask_neg_p2_fail_not_invertable(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[P2:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT:    [[NOTMASK:%.*]] = sub i8 0, [[P2]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %ny = sub i8 0, %y
+  %p2 = and i8 %ny, %y
+  %notmask = sub i8 0, %p2
+  %and = and i8 %notmask, %x
+  %r = icmp eq i8 0, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_const_slt(i8 %x_in) {
+; CHECK-LABEL: @src_is_mask_const_slt(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], 7
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i8 [[X]], [[AND]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %and = and i8 %x, 7
+  %r = icmp slt i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_const_sgt(i8 %x_in) {
+; CHECK-LABEL: @src_is_mask_const_sgt(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i8 [[X]], 7
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %and = and i8 %x, 7
+  %r = icmp sgt i8 %x, %and
+  ret i1 %r
+}
+
+define i1 @src_is_mask_const_sle(i8 %x_in) {
+; CHECK-LABEL: @src_is_mask_const_sle(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], 31
+; CHECK-NEXT:    [[R:%.*]] = icmp sle i8 [[AND]], [[X]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %and = and i8 %x, 31
+  %r = icmp sle i8 %and, %x
+  ret i1 %r
+}
+
+define i1 @src_is_mask_const_sge(i8 %x_in) {
+; CHECK-LABEL: @src_is_mask_const_sge(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i8 [[X]], 32
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = xor i8 %x_in, 123
+  %and = and i8 %x, 31
+  %r = icmp sge i8 %and, %x
+  ret i1 %r
+}

>From d6c6cbc96268fd2c4e97393bf4a22b6e2d37e7d5 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:45:52 -0500
Subject: [PATCH 05/13] [InstCombine] Improve mask detection in
 `foldICmpWithLowBitMaskedVal`

Make recursive matcher that is able to detect a lot more patterns.
Proofs for all supported patterns: https://alive2.llvm.org/ce/z/fSQ3nZ

Differential Revision: https://reviews.llvm.org/D159058
---
 llvm/include/llvm/IR/PatternMatch.h           |  25 +++
 .../InstCombine/InstCombineCompares.cpp       | 149 ++++++++++++++++--
 ...nt-low-bit-mask-and-icmp-eq-to-icmp-ule.ll |   3 +-
 ...nt-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll |   3 +-
 ...t-low-bit-mask-and-icmp-sge-to-icmp-sle.ll |   3 +-
 ...t-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll |   3 +-
 ...t-low-bit-mask-and-icmp-sle-to-icmp-sle.ll |   3 +-
 ...t-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll |   3 +-
 ...t-low-bit-mask-and-icmp-uge-to-icmp-ule.ll |   3 +-
 ...t-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll |   3 +-
 ...t-low-bit-mask-and-icmp-ule-to-icmp-ule.ll |   3 +-
 ...t-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll |   3 +-
 .../InstCombine/icmp-and-lowbit-mask.ll       |  45 ++----
 13 files changed, 184 insertions(+), 65 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 13877538f79de6d..0d50df19a326725 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -564,6 +564,19 @@ inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
   return V;
 }
 
+struct is_negated_power2_or_zero {
+  bool isValue(const APInt &C) { return !C || C.isNegatedPowerOf2(); }
+};
+/// Match a integer or vector negated power-of-2.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_negated_power2_or_zero> m_NegatedPower2OrZero() {
+  return cst_pred_ty<is_negated_power2_or_zero>();
+}
+inline api_pred_ty<is_negated_power2_or_zero>
+m_NegatedPower2OrZero(const APInt *&V) {
+  return V;
+}
+
 struct is_power2_or_zero {
   bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
 };
@@ -595,6 +608,18 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
 }
 inline api_pred_ty<is_lowbit_mask> m_LowBitMask(const APInt *&V) { return V; }
 
+struct is_lowbit_mask_or_zero {
+  bool isValue(const APInt &C) { return !C || C.isMask(); }
+};
+/// Match an integer or vector with only the low bit(s) set.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_lowbit_mask_or_zero> m_LowBitMaskOrZero() {
+  return cst_pred_ty<is_lowbit_mask_or_zero>();
+}
+inline api_pred_ty<is_lowbit_mask_or_zero> m_LowBitMaskOrZero(const APInt *&V) {
+  return V;
+}
+
 struct icmp_pred_with_threshold {
   ICmpInst::Predicate Pred;
   const APInt *Thr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 66e2b6c72cce46c..30e0e3e33e7ec6b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4054,6 +4054,109 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
   return nullptr;
 }
 
+// Returns of V is a Mask ((X + 1) & X == 0) or ~Mask (-Pow2OrZero)
+static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
+                         unsigned Depth = 0) {
+  if (Not ? match(V, m_NegatedPower2OrZero()) : match(V, m_LowBitMaskOrZero()))
+    return true;
+  if (V->getType()->getScalarSizeInBits() == 1)
+    return true;
+  if (Depth++ >= MaxAnalysisRecursionDepth)
+    return false;
+  Value *X;
+  if (match(V, m_Not(m_Value(X))))
+    return isMaskOrZero(X, !Not, Q, Depth);
+  const Operator *I = dyn_cast<Operator>(V);
+  if (I == nullptr)
+    return false;
+  switch (I->getOpcode()) {
+  case Instruction::ZExt:
+    // ZExt(Mask) is a Mask.
+    return !Not && isMaskOrZero(I->getOperand(0), Not, Q, Depth);
+  case Instruction::SExt:
+    // SExt(Mask) is a Mask.
+    // SExt(~Mask) is a ~Mask.
+    return isMaskOrZero(I->getOperand(0), Not, Q, Depth);
+  case Instruction::And:
+  case Instruction::Or:
+    // Mask0 | Mask1 is a Mask.
+    // Mask0 & Mask1 is a Mask.
+    // ~Mask0 | ~Mask1 is a ~Mask.
+    // ~Mask0 & ~Mask1 is a ~Mask.
+    return isMaskOrZero(I->getOperand(1), Not, Q, Depth) &&
+           isMaskOrZero(I->getOperand(0), Not, Q, Depth);
+  case Instruction::Xor:
+    // (X ^ (X - 1)) is a Mask
+    return match(V, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())));
+  case Instruction::Select:
+    // c ? Mask0 : Mask1 is a Mask.
+    return isMaskOrZero(I->getOperand(1), Not, Q, Depth) &&
+           isMaskOrZero(I->getOperand(2), Not, Q, Depth);
+  case Instruction::Shl:
+    if (Not) {
+      // (-1 >> X) << X is ~Mask
+      if (match(I->getOperand(0),
+                m_Shr(m_AllOnes(), m_Specific(I->getOperand(1)))))
+        return true;
+
+      // (~Mask) << X is a ~Mask.
+      return isMaskOrZero(I->getOperand(0), Not, Q, Depth);
+    }
+    break;
+  case Instruction::LShr:
+    if (!Not) {
+      // (-1 << X) >> X is a Mask
+      if (match(I->getOperand(0),
+                m_Shl(m_AllOnes(), m_Specific(I->getOperand(1)))))
+        return true;
+      // Mask >> X is a Mask.
+      return isMaskOrZero(I->getOperand(0), Not, Q, Depth);
+    }
+    return false;
+  case Instruction::AShr:
+    // Mask s>> X is a Mask.
+    // ~Mask s>> X is a ~Mask.
+    return isMaskOrZero(I->getOperand(0), Not, Q, Depth);
+  case Instruction::Add:
+    // Pow2 - 1 is a Mask.
+    if (!Not && match(I->getOperand(1), m_AllOnes()))
+      return isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
+                                    Depth, Q.AC, Q.CxtI, Q.DT);
+    break;
+  case Instruction::Sub:
+    // -Pow2 is a ~Mask.
+    if (Not && match(I->getOperand(0), m_Zero()))
+      return isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
+                                    Depth, Q.AC, Q.CxtI, Q.DT);
+    break;
+  case Instruction::Invoke:
+  case Instruction::Call: {
+    if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+        // min/max(Mask0, Mask1) is a Mask.
+        // min/max(~Mask0, ~Mask1) is a ~Mask.
+      case Intrinsic::umax:
+      case Intrinsic::smax:
+      case Intrinsic::umin:
+      case Intrinsic::smin:
+        return isMaskOrZero(II->getArgOperand(1), Not, Q, Depth) &&
+               isMaskOrZero(II->getArgOperand(0), Not, Q, Depth);
+
+        // In the context of masks, bitreverse(Mask) == ~Mask
+      case Intrinsic::bitreverse:
+        return isMaskOrZero(II->getArgOperand(0), !Not, Q, Depth);
+      default:
+        break;
+      }
+    }
+    break;
+  }
+  default:
+    break;
+  }
+  return false;
+}
+
 /// Some comparisons can be simplified.
 /// In this case, we are looking for comparisons that look like
 /// a check for a lossy truncation.
@@ -4067,21 +4170,35 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
 /// The Mask can be a constant, too.
 /// For some predicates, the operands are commutative.
 /// For others, x can only be on a specific side.
-static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
-                                          InstCombiner::BuilderTy &Builder) {
+static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, const SimplifyQuery &Q,
+                                          InstCombiner &IC) {
+
+  Value *X, *M;
+  ICmpInst::Predicate Pred = I.getPredicate();
   ICmpInst::Predicate SrcPred;
-  Value *X, *M, *Y;
-  auto m_VariableMask = m_CombineOr(
-      m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
-                  m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
-      m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
-                  m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
-  auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
-  if (!match(&I, m_c_ICmp(SrcPred,
-                          m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
-                          m_Deferred(X))))
-    return nullptr;
+  bool NeedsNot = false;
+
+  auto CheckMask = [&](Value *V, bool Not) {
+    if (!ICmpInst::isSigned(Pred))
+      return isMaskOrZero(V, Not, Q);
+    return Not ? match(V, m_NegatedPower2OrZero())
+               : match(V, m_LowBitMaskOrZero());
+  };
 
+  auto TryMatch = [&](unsigned OpNo) {
+    SrcPred = Pred;
+    if (match(I.getOperand(OpNo),
+              m_c_And(m_Specific(I.getOperand(1 - OpNo)), m_Value(M)))) {
+      X = I.getOperand(1 - OpNo);
+      if (OpNo)
+        SrcPred = ICmpInst::getSwappedPredicate(Pred);
+      return CheckMask(M, /*Not*/ false);
+    }
+    return false;
+  };
+
+  if (!TryMatch(0) && !TryMatch(1))
+    return nullptr;
   ICmpInst::Predicate DstPred;
   switch (SrcPred) {
   case ICmpInst::Predicate::ICMP_EQ:
@@ -4149,7 +4266,9 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
     M = Constant::replaceUndefsWith(VecC, SafeReplacementConstant);
   }
 
-  return Builder.CreateICmp(DstPred, X, M);
+  if (NeedsNot)
+    M = IC.Builder.CreateNot(M);
+  return IC.Builder.CreateICmp(DstPred, X, M);
 }
 
 /// Some comparisons can be simplified.
@@ -5061,7 +5180,7 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
   if (Value *V = foldMultiplicationOverflowCheck(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
+  if (Value *V = foldICmpWithLowBitMaskedVal(I, Q, *this))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = foldICmpAndXX(I, Q, *this))
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
index a957fb2d088ef46..5b7a99d53c308ca 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
@@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 
 define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 1>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 0>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
index 57361cdf38977c1..160d968b9ac4c77 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
@@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 
 define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 0>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 0>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
index 0dfc9f51baf9c26..60921042d524356 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
@@ -50,8 +50,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 
 define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 0>
-; CHECK-NEXT:    [[RET:%.*]] = icmp sge <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 4, i8 1>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 0>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
index e0893ce4cf2ecb1..6345e70d7220e29 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
@@ -63,8 +63,7 @@ define <2 x i1> @p2_vec_nonsplat() {
 define <2 x i1> @p2_vec_nonsplat_edgecase() {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 0>
-; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X]], <i8 3, i8 0>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
index 81887a390915730..b7aec53fed6760f 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
@@ -63,8 +63,7 @@ define <2 x i1> @p2_vec_nonsplat() {
 define <2 x i1> @p2_vec_nonsplat_edgecase() {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 0>
-; CHECK-NEXT:    [[RET:%.*]] = icmp sle <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[X]], <i8 4, i8 1>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
index 8ce8687f1984462..56661d335c4f602 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
@@ -50,8 +50,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 
 define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 0>
-; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 3, i8 0>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 0>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
index ff09e255185b5ad..a93e8f779435fc8 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
@@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 
 define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 1>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 0>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
index 4ad04710fd7bb96..73ea4d456d2462e 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
@@ -75,8 +75,7 @@ define <2 x i1> @p2_vec_nonsplat() {
 define <2 x i1> @p2_vec_nonsplat_edgecase0() {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -4, i8 -1>
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X]], <i8 3, i8 0>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
index 8e513dcbf4ef3a3..53886b5f2dc9c33 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
@@ -75,8 +75,7 @@ define <2 x i1> @p2_vec_nonsplat() {
 define <2 x i1> @p2_vec_nonsplat_edgecase0() {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -4, i8 -1>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X]], <i8 4, i8 1>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
index d02ecf6965e878f..d66be571008c2f7 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
@@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 
 define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
 ; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 0>
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 0>
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index d815183577bbcbf..c1990c3e061154c 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -15,8 +15,7 @@ define i1 @src_is_mask_zext(i16 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[M_IN:%.*]] = lshr i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = zext i8 [[M_IN]] to i16
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i16 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i16 %x_in, 123
@@ -94,8 +93,7 @@ define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) {
 ; CHECK-NEXT:    [[MY:%.*]] = lshr i8 7, [[Y:%.*]]
 ; CHECK-NEXT:    [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X]], [[AND]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -133,8 +131,7 @@ define i1 @src_is_mask_or(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[MY:%.*]] = lshr i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[MY]], 7
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X]], [[AND]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -151,8 +148,7 @@ define i1 @src_is_mask_xor(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    [[MASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -187,8 +183,7 @@ define i1 @src_is_mask_select(i8 %x_in, i8 %y, i1 %cond) {
 ; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[MASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -267,8 +262,7 @@ define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
 ; CHECK-NEXT:    [[MASK:%.*]] = lshr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X]], [[AND]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -288,8 +282,7 @@ define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
 ; CHECK-NEXT:    [[MASK:%.*]] = ashr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -307,8 +300,7 @@ define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[P2ORZ:%.*]] = shl i8 2, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = add i8 [[P2ORZ]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -325,8 +317,7 @@ define i1 @src_is_mask_umax(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.umax.i8(i8 [[YMASK]], i8 3)
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -346,8 +337,7 @@ define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) {
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[ZMASK:%.*]] = lshr i8 15, [[Z:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 [[ZMASK]])
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -387,8 +377,7 @@ define i1 @src_is_mask_smax(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.smax.i8(i8 [[YMASK]], i8 -1)
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -407,8 +396,7 @@ define i1 @src_is_mask_smin(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.smin.i8(i8 [[YMASK]], i8 0)
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -426,8 +414,7 @@ define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[NMASK:%.*]] = shl i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -445,8 +432,7 @@ define i1 @src_is_notmask_sext(i16 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[M_IN:%.*]] = shl i8 -8, [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[M_IN]], -1
 ; CHECK-NEXT:    [[MASK:%.*]] = sext i8 [[TMP1]] to i16
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i16 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i16 %x_in, 123
@@ -544,8 +530,7 @@ define i1 @src_is_notmask_ashr(i16 %x_in, i8 %y, i16 %z) {
 ; CHECK-NEXT:    [[NMASK:%.*]] = sext i8 [[M_IN]] to i16
 ; CHECK-NEXT:    [[NMASK_SHR:%.*]] = ashr i16 [[NMASK]], [[Z:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = xor i16 [[NMASK_SHR]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[X]], [[AND]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i16 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i16 %x_in, 123

>From ecf16c7303178be4f33a9eeb519743902fff708c Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:45:55 -0500
Subject: [PATCH 06/13] [InstCombine] Recognize `(icmp eq/ne (and X, ~Mask),
 0)` pattern in `foldICmpWithLowBitMaskedVal`

`(icmp eq/ne (and X, ~Mask), 0)` is equivilent to `(icmp eq/ne (and X,
Mask), X` and we sometimes generate the former pattern intentionally
to reduce number of uses of `X`.
Proof: https://alive2.llvm.org/ce/z/3u-usC

Differential Revision: https://reviews.llvm.org/D159329
---
 .../InstCombine/InstCombineCompares.cpp       | 11 ++++++++
 .../InstCombine/icmp-and-lowbit-mask.ll       | 27 +++++++------------
 .../InstCombine/lshr-and-negC-icmpeq-zero.ll  |  9 +++----
 .../lshr-and-signbit-icmpeq-zero.ll           |  9 +++----
 .../InstCombine/shl-and-negC-icmpeq-zero.ll   |  9 +++----
 .../shl-and-signbit-icmpeq-zero.ll            |  9 +++----
 6 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 30e0e3e33e7ec6b..74279d47ac913fd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4162,6 +4162,7 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
 /// a check for a lossy truncation.
 /// Folds:
 ///   icmp SrcPred (x & Mask), x    to    icmp DstPred x, Mask
+///   icmp eq/ne (x & ~Mask), 0     to    icmp DstPred x, Mask
 /// Where Mask is some pattern that produces all-ones in low bits:
 ///    (-1 >> y)
 ///    ((-1 << y) >> y)     <- non-canonical, has extra uses
@@ -4194,6 +4195,16 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, const SimplifyQuery &Q,
         SrcPred = ICmpInst::getSwappedPredicate(Pred);
       return CheckMask(M, /*Not*/ false);
     }
+    if (OpNo == 1 && match(I.getOperand(1), m_Zero()) &&
+        ICmpInst::isEquality(Pred) &&
+        match(I.getOperand(0), m_OneUse(m_And(m_Value(X), m_Value(M))))) {
+      NeedsNot = true;
+      if (IC.isFreeToInvert(X, X->hasOneUse()) && CheckMask(X, /*Not*/ true)) {
+        std::swap(X, M);
+        return true;
+      }
+      return IC.isFreeToInvert(M, M->hasOneUse()) && CheckMask(M, /*Not*/ true);
+    }
     return false;
   };
 
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index c1990c3e061154c..5578b9a66e1f219 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -50,9 +50,7 @@ define i1 @src_is_mask_sext(i16 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[M_IN:%.*]] = lshr i8 31, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = zext i8 [[M_IN]] to i16
-; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i16 [[MASK]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], 0
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i16 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i16 %x_in, 123
@@ -220,9 +218,7 @@ define i1 @src_is_mask_shl_lshr(i8 %x_in, i8 %y, i1 %cond) {
 ; CHECK-LABEL: @src_is_mask_shl_lshr(
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 122
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 -1, [[Y:%.*]]
-; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[TMP1]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -449,10 +445,8 @@ define i1 @src_is_notmask_shl(i8 %x_in, i8 %y, i1 %cond) {
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 122
 ; CHECK-NEXT:    [[NMASK:%.*]] = shl i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
-; CHECK-NEXT:    [[NOTMASK0:%.*]] = xor i8 [[MASK]], -1
-; CHECK-NEXT:    [[NOTMASK:%.*]] = select i1 [[COND:%.*]], i8 [[NOTMASK0]], i8 -8
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND:%.*]], i8 [[MASK]], i8 7
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -550,9 +544,7 @@ define i1 @src_is_notmask_neg_p2(i8 %x_in, i8 %y) {
 ; CHECK-NEXT:    [[P2:%.*]] = and i8 [[NY]], [[Y]]
 ; CHECK-NEXT:    [[NMASK:%.*]] = sub i8 0, [[P2]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
-; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[MASK]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -569,11 +561,10 @@ define i1 @src_is_notmask_neg_p2(i8 %x_in, i8 %y) {
 define i1 @src_is_notmask_neg_p2_fail_not_invertable(i8 %x_in, i8 %y) {
 ; CHECK-LABEL: @src_is_notmask_neg_p2_fail_not_invertable(
 ; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
-; CHECK-NEXT:    [[P2:%.*]] = and i8 [[NY]], [[Y]]
-; CHECK-NEXT:    [[NOTMASK:%.*]] = sub i8 0, [[P2]]
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[X]], [[TMP3]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
diff --git a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll
index 79aef3a5406cfb0..847a7940bad8c74 100644
--- a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll
@@ -84,8 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_lshr_and_negC_eq_undef1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_lshr_and_negC_eq_undef1(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[LSHR]], <i32 -8, i32 undef, i32 -8, i32 -8>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -97,8 +96,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_undef1(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_lshr_and_negC_eq_undef2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_lshr_and_negC_eq_undef2(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[LSHR]], <i32 -8, i32 -8, i32 -8, i32 -8>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 0, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -110,8 +108,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_undef2(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_lshr_and_negC_eq_undef3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_lshr_and_negC_eq_undef3(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[LSHR]], <i32 -8, i32 -8, i32 undef, i32 -8>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 0, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
index 5335a4736896d7d..39f4e58b25dc849 100644
--- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
@@ -84,8 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_undef1(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[LSHR]], <i32 -2147483648, i32 undef, i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -97,8 +96,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef1(<4 x i32> %x, <4 x i32> %y
 define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_undef2(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[LSHR]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 undef, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -110,8 +108,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef2(<4 x i32> %x, <4 x i32> %y
 define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_undef3(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[LSHR]], <i32 -2147483648, i32 undef, i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 0, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll
index d8e7fe2e2a2c163..406dc72f2646e5b 100644
--- a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll
@@ -84,8 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_shl_and_negC_eq_undef1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_shl_and_negC_eq_undef1(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], <i32 -8, i32 undef, i32 -8, i32 -8>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -97,8 +96,7 @@ define <4 x i1> @vec_shl_and_negC_eq_undef1(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_shl_and_negC_eq_undef2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_shl_and_negC_eq_undef2(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], <i32 -8, i32 -8, i32 -8, i32 -8>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 0, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -110,8 +108,7 @@ define <4 x i1> @vec_shl_and_negC_eq_undef2(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_shl_and_negC_eq_undef3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_shl_and_negC_eq_undef3(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], <i32 -8, i32 -8, i32 undef, i32 -8>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 0, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll
index 42b755f51a97137..4c2c876e3925bf9 100644
--- a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll
@@ -84,8 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_undef1(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], <i32 -2147483648, i32 undef, i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -97,8 +96,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef1(<4 x i32> %x, <4 x i32> %y)
 define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_undef2(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 undef, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -110,8 +108,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef2(<4 x i32> %x, <4 x i32> %y)
 define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_undef3(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], <i32 -2147483648, i32 undef, i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], <i32 0, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y

>From 582d6de7e0a9f9b6872f594eb99207bc3aea2787 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:45:58 -0500
Subject: [PATCH 07/13] [InstCombine] Make the `(icmp eq/ne (and X, Y), X)`
 canonicalization work for non-const operands

We currently do:
    `(icmp eq/ne (and X, Y), Y)` -> `(icmp eq/ne (and ~X, Y), 0)`
if `X` is constant. We can make this more general and do it if `X` is
freely invertable (i.e say `X = ~Z`).

As well, we can also do:
    `(icmp eq/ne (and X, Y), Y)` -> `(icmp eq/ne (or X, ~Y), -1)`
If `Y` is freely invertible.

Proofs: https://alive2.llvm.org/ce/z/yeWH3E

Differential Revision: https://reviews.llvm.org/D159059
---
 .../InstCombine/InstCombineCompares.cpp       | 31 ++++++++++---------
 ...low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll | 11 +++----
 ...low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll | 11 +++----
 ...low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll | 12 +++----
 ...low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll | 12 +++----
 .../InstCombine/icmp-and-lowbit-mask.ll       | 30 +++++++++---------
 .../Transforms/InstCombine/icmp-of-and-x.ll   | 22 ++++++-------
 7 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 74279d47ac913fd..27f236a4c4b39d6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4662,6 +4662,22 @@ static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
   if (Pred == ICmpInst::ICMP_UGE)
     return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
 
+  if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
+    // icmp (X & Y) eq/ne Y --> (X | ~Y) eq/ne -1 if Y is freely invertible and
+    // Y is non-constant. If Y is constant this form is preferable (and
+    // canonicalize too it elsewhere).
+    if (!match(Op1, m_ImmConstant()) &&
+        IC.isFreeToInvert(Op1, Op1->hasOneUse() || Op1->hasNUses(2)))
+      return new ICmpInst(Pred,
+                          IC.Builder.CreateOr(A, IC.Builder.CreateNot(Op1)),
+                          Constant::getAllOnesValue(Op1->getType()));
+    // icmp (X & Y) eq/ne Y --> (~X & Y) eq/ne 0 if X  is freely invertible.
+    if (IC.isFreeToInvert(A, A->hasOneUse()))
+      return new ICmpInst(Pred,
+                          IC.Builder.CreateAnd(Op1, IC.Builder.CreateNot(A)),
+                          Constant::getNullValue(Op1->getType()));
+  }
+
   return nullptr;
 }
 
@@ -5446,21 +5462,6 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
     }
   }
 
-  // canoncalize:
-  // (icmp eq/ne (and X, C), X)
-  //    -> (icmp eq/ne (and X, ~C), 0)
-  {
-    Constant *CMask;
-    A = nullptr;
-    if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_ImmConstant(CMask)))))
-      A = Op1;
-    else if (match(Op1, m_OneUse(m_And(m_Specific(Op0), m_ImmConstant(CMask)))))
-      A = Op0;
-    if (A)
-      return new ICmpInst(Pred, Builder.CreateAnd(A, Builder.CreateNot(CMask)),
-                          Constant::getNullValue(A->getType()));
-  }
-
   if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
     // A == (A^B)  ->  B == 0
     Value *OtherVal = A == Op0 ? B : A;
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
index 624b9baa372810a..af1c377d63b84c8 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
@@ -269,9 +269,8 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) {
 define i1 @n1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n1(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 1, %y ; not -1
@@ -284,9 +283,9 @@ define i1 @n1(i8 %x, i8 %y) {
 define i1 @n2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n2(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[T0]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 -1, %y
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
index 4e1b90a476a2601..68188b1360bc27d 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
@@ -269,9 +269,8 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) {
 define i1 @n1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n1(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 1, %y ; not -1
@@ -284,9 +283,9 @@ define i1 @n1(i8 %x, i8 %y) {
 define i1 @n2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n2(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[T0]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 -1, %y
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
index 75de1dc6d07a222..8ae4d755f977f56 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
@@ -251,9 +251,9 @@ define i1 @n1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n1(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 0, [[T0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 -1, %y ; not 1
@@ -268,9 +268,9 @@ define i1 @n2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n2(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add nuw i8 [[T0]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nuw i8 -2, [[T0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 1, %y
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
index a92f4a9d9543112..622a78419a2ffb0 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
@@ -251,9 +251,9 @@ define i1 @n1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n1(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 0, [[T0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 -1, %y ; not 1
@@ -268,9 +268,9 @@ define i1 @n2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @n2(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add nuw i8 [[T0]], 1
-; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nuw i8 -2, [[T0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %t0 = shl i8 1, %y
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 5578b9a66e1f219..43f6f36134ce338 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -29,11 +29,11 @@ define i1 @src_is_mask_zext(i16 %x_in, i8 %y) {
 
 define i1 @src_is_mask_zext_fail_not_mask(i16 %x_in, i8 %y) {
 ; CHECK-LABEL: @src_is_mask_zext_fail_not_mask(
-; CHECK-NEXT:    [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[M_IN:%.*]] = lshr i8 -2, [[Y:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = zext i8 [[M_IN]] to i16
-; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = or i16 [[TMP1]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i16 %x_in, 123
@@ -106,12 +106,12 @@ define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) {
 
 define i1 @src_is_mask_and_fail_mixed(i8 %x_in, i8 %y, i8 %z) {
 ; CHECK-LABEL: @src_is_mask_and_fail_mixed(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[MY:%.*]] = ashr i8 -8, [[Y:%.*]]
 ; CHECK-NEXT:    [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X]], [[AND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -159,11 +159,11 @@ define i1 @src_is_mask_xor(i8 %x_in, i8 %y) {
 
 define i1 @src_is_mask_xor_fail_notmask(i8 %x_in, i8 %y) {
 ; CHECK-LABEL: @src_is_mask_xor_fail_notmask(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[Y_M1_NOT:%.*]] = sub i8 0, [[Y:%.*]]
 ; CHECK-NEXT:    [[NOTMASK:%.*]] = xor i8 [[Y_M1_NOT]], [[Y]]
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[NOTMASK]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -349,12 +349,12 @@ define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) {
 
 define i1 @src_is_mask_umin_fail_mismatch(i8 %x_in, i8 %y) {
 ; CHECK-LABEL: @src_is_mask_umin_fail_mismatch(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
 ; CHECK-NEXT:    [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 -32)
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -500,12 +500,12 @@ define i1 @src_is_notmask_lshr_shl(i8 %x_in, i8 %y) {
 
 define i1 @src_is_notmask_lshr_shl_fail_mismatch_shifts(i8 %x_in, i8 %y, i8 %z) {
 ; CHECK-LABEL: @src_is_notmask_lshr_shl_fail_mismatch_shifts(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
 ; CHECK-NEXT:    [[MASK_SHR:%.*]] = lshr i8 -1, [[Y:%.*]]
 ; CHECK-NEXT:    [[NMASK:%.*]] = shl i8 [[MASK_SHR]], [[Z:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = xor i8 [[NMASK]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[TMP1]], [[MASK]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
index e95c72b75f97dfd..a83572bff3251ae 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
@@ -238,9 +238,9 @@ define i1 @icmp_sle_negx_y_fail_maybe_zero(i8 %x, i8 %y) {
 
 define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
 ; CHECK-LABEL: @icmp_eq_x_invertable_y_todo(
-; CHECK-NEXT:    [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 24
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %yy = select i1 %y, i8 7, i8 24
@@ -251,9 +251,8 @@ define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
 
 define i1 @icmp_eq_x_invertable_y(i8 %x, i8 %y) {
 ; CHECK-LABEL: @icmp_eq_x_invertable_y(
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %yy = xor i8 %y, -1
@@ -264,9 +263,9 @@ define i1 @icmp_eq_x_invertable_y(i8 %x, i8 %y) {
 
 define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y) {
 ; CHECK-LABEL: @icmp_eq_x_invertable_y2_todo(
-; CHECK-NEXT:    [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 24
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[YY]], [[AND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %yy = select i1 %y, i8 7, i8 24
@@ -277,9 +276,8 @@ define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y) {
 
 define i1 @icmp_eq_x_invertable_y2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @icmp_eq_x_invertable_y2(
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND]], [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %yy = xor i8 %y, -1

>From c1e31c16863951ed2977c3ab1519f6d16f02ca39 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:46:00 -0500
Subject: [PATCH 08/13] [InstCombine] Make `getKnownSign` a member function of
 InstCombiner; NFC

This is prep for using `getKnownSign` outside of just InstCombineCalls.

Differential Revision: https://reviews.llvm.org/D159060
---
 .../InstCombine/InstCombineCalls.cpp          | 29 ++++++++-----------
 .../InstCombine/InstCombineInternal.h         |  7 +++++
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e29fb869686ca0b..9930421ec53bca4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1016,10 +1016,9 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
   return nullptr;
 }
 
-static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
-                                   const DataLayout &DL, AssumptionCache *AC,
-                                   DominatorTree *DT) {
-  KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
+std::optional<bool> InstCombinerImpl::getKnownSign(Value *Op,
+                                                   Instruction *CxtI) const {
+  KnownBits Known = llvm::computeKnownBits(Op, DL, /*Depth*/ 0, &AC, CxtI, &DT);
   if (Known.isNonNegative())
     return false;
   if (Known.isNegative())
@@ -1033,11 +1032,9 @@ static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
       ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
 }
 
-static std::optional<bool> getKnownSignOrZero(Value *Op, Instruction *CxtI,
-                                              const DataLayout &DL,
-                                              AssumptionCache *AC,
-                                              DominatorTree *DT) {
-  if (std::optional<bool> Sign = getKnownSign(Op, CxtI, DL, AC, DT))
+std::optional<bool>
+InstCombinerImpl::getKnownSignOrZero(Value *Op, Instruction *CxtI) const {
+  if (std::optional<bool> Sign = getKnownSign(Op, CxtI))
     return Sign;
 
   Value *X, *Y;
@@ -1049,12 +1046,11 @@ static std::optional<bool> getKnownSignOrZero(Value *Op, Instruction *CxtI,
 
 /// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
 static bool signBitMustBeTheSame(Value *Op0, Value *Op1, Instruction *CxtI,
-                                 const DataLayout &DL, AssumptionCache *AC,
-                                 DominatorTree *DT) {
-  std::optional<bool> Known1 = getKnownSign(Op1, CxtI, DL, AC, DT);
+                                 InstCombinerImpl &IC) {
+  std::optional<bool> Known1 = IC.getKnownSign(Op1, CxtI);
   if (!Known1)
     return false;
-  std::optional<bool> Known0 = getKnownSign(Op0, CxtI, DL, AC, DT);
+  std::optional<bool> Known0 = IC.getKnownSign(Op0, CxtI);
   if (!Known0)
     return false;
   return *Known0 == *Known1;
@@ -1547,8 +1543,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
       return replaceOperand(*II, 0, X);
 
-    if (std::optional<bool> Known =
-            getKnownSignOrZero(IIOperand, II, DL, &AC, &DT)) {
+    if (std::optional<bool> Known = getKnownSignOrZero(IIOperand, II)) {
       // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
       // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
       if (!*Known)
@@ -1673,7 +1668,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
 
       if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
-        auto KnownSign = getKnownSign(X, II, DL, &AC, &DT);
+        auto KnownSign = getKnownSign(X, II);
         if (KnownSign == std::nullopt) {
           UseOr = false;
           UseAndN = false;
@@ -2446,7 +2441,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
 
       if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
-          signBitMustBeTheSame(Exp, InnerExp, II, DL, &AC, &DT)) {
+          signBitMustBeTheSame(Exp, InnerExp, II, *this)) {
         // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
         // width.
         Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index a53d67b2899b700..c60d0754ec3b219 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -470,6 +470,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
       Instruction::BinaryOps BinaryOp, bool IsSigned,
       Value *LHS, Value *RHS, Instruction *CxtI) const;
 
+  // Return true if known negative, false if known positive, and nullopt if
+  // unknown.
+  std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI) const;
+  // Return true if known negative or zero, false if known non-zero positive,
+  // and nullopt if unknown.
+  std::optional<bool> getKnownSignOrZero(Value *Op, Instruction *CxtI) const;
+
   /// Performs a few simplifications for operators which are associative
   /// or commutative.
   bool SimplifyAssociativeOrCommutative(BinaryOperator &I);

>From 9f8857a426ad6d4c13dd945163540379477e9bc2 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:46:03 -0500
Subject: [PATCH 09/13] [InstCombine] Add transforms `(icmp spred (and X, Y),
 X)` if `X` or `Y` are known signed/unsigned

Several transforms:
    1) If known `Y < 0`:
        - slt -> ult: https://alive2.llvm.org/ce/z/unpsxu
        - sle -> ule: https://alive2.llvm.org/ce/z/a5cpmr
        - sgt -> ugt: https://alive2.llvm.org/ce/z/qgsgya
        - sge -> uge: https://alive2.llvm.org/ce/z/ek_3ls
    2) If known `Y > 0`:
        - `(X & PosY) s> X --> X s< 0`
            - https://alive2.llvm.org/ce/z/drewij
        - `(X & PosY) s> X --> X s< 0`
            - https://alive2.llvm.org/ce/z/9arL3D
    3) If known `X < 0`:
        - `(NegX & Y) s> NegX --> Y s>= 0`
            - https://alive2.llvm.org/ce/z/ApkaEh
        - `(NegX & Y) s<= NegX --> Y s< 0`
            - https://alive2.llvm.org/ce/z/oRnfHp

Differential Revision: https://reviews.llvm.org/D145426
---
 .../InstCombine/InstCombineCompares.cpp       | 45 ++++++++++++++++++-
 ...t-low-bit-mask-and-icmp-sge-to-icmp-sle.ll |  3 +-
 ...t-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll |  3 +-
 ...t-low-bit-mask-and-icmp-sle-to-icmp-sle.ll |  3 +-
 ...t-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll |  3 +-
 .../InstCombine/icmp-and-lowbit-mask.ll       |  8 +---
 .../Transforms/InstCombine/icmp-of-and-x.ll   | 28 ++++--------
 7 files changed, 58 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 27f236a4c4b39d6..e2cdf0cd5c4349b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4641,7 +4641,7 @@ static Instruction *foldICmpXNegX(ICmpInst &I,
   return nullptr;
 }
 
-static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
+static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &,
                                   InstCombinerImpl &IC) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
   // Normalize and operand as operand 0.
@@ -4678,6 +4678,49 @@ static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
                           Constant::getNullValue(Op1->getType()));
   }
 
+  if (ICmpInst::isEquality(Pred) || ICmpInst::getSignedPredicate(Pred) != Pred)
+    return nullptr;
+
+  auto IsTrue = [](std::optional<bool> OptBool) {
+    return OptBool.has_value() && OptBool.value();
+  };
+
+  auto IsFalse = [](std::optional<bool> OptBool) {
+    return OptBool.has_value() && !OptBool.value();
+  };
+
+  auto KnownSignY = IC.getKnownSign(A, &I);
+
+  // (X & NegY) spred X --> (icmp (X & NegY) upred X
+  if (IsTrue(KnownSignY))
+    return new ICmpInst(ICmpInst::getUnsignedPredicate(Pred), Op0, Op1);
+
+  if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGT)
+    return nullptr;
+
+  if (IsFalse(KnownSignY)) {
+    // (X & PosY) s<= X --> X s>= 0
+    if (Pred == ICmpInst::ICMP_SLE)
+      return new ICmpInst(ICmpInst::ICMP_SGE, Op1,
+                          Constant::getNullValue(Op1->getType()));
+    // (X & PosY) s> X --> X s< 0
+    if (Pred == ICmpInst::ICMP_SGT)
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op1,
+                          Constant::getNullValue(Op1->getType()));
+  }
+
+  if (IsTrue(IC.getKnownSign(Op1, &I))) {
+    // (NegX & Y) s> NegX --> Y s>= 0
+    if (Pred == ICmpInst::ICMP_SGT)
+      return new ICmpInst(ICmpInst::ICMP_SGE, A,
+                          Constant::getNullValue(A->getType()));
+
+    // (NegX & Y) s<= NegX --> Y s< 0
+    if (Pred == ICmpInst::ICMP_SLE)
+      return new ICmpInst(ICmpInst::ICMP_SLT, A,
+                          Constant::getNullValue(A->getType()));
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
index 60921042d524356..0380c052d5d025d 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
@@ -98,8 +98,7 @@ declare i8 @gen8()
 define i1 @c0() {
 ; CHECK-LABEL: @c0(
 ; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
-; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[X]], -1
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %x = call i8 @gen8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
index 6345e70d7220e29..4a8b4d4ba595cd7 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
@@ -125,8 +125,7 @@ define i1 @oneuse0() {
 
 define i1 @c0(i8 %x) {
 ; CHECK-LABEL: @c0(
-; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %tmp0 = and i8 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
index b7aec53fed6760f..8d00f445243a402 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
@@ -113,8 +113,7 @@ define i1 @oneuse0() {
 
 define i1 @c0(i8 %x) {
 ; CHECK-LABEL: @c0(
-; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[X:%.*]], -1
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %tmp0 = and i8 %x, 3
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
index 56661d335c4f602..8130029a3dd58e1 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
@@ -108,8 +108,7 @@ declare i8 @gen8()
 define i1 @c0() {
 ; CHECK-LABEL: @c0(
 ; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
-; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], 0
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
   %x = call i8 @gen8()
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 43f6f36134ce338..a134a3a5ddc8b9c 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -578,9 +578,7 @@ define i1 @src_is_notmask_neg_p2_fail_not_invertable(i8 %x_in, i8 %y) {
 
 define i1 @src_is_mask_const_slt(i8 %x_in) {
 ; CHECK-LABEL: @src_is_mask_const_slt(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], 7
-; CHECK-NEXT:    [[R:%.*]] = icmp slt i8 [[X]], [[AND]]
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i8 [[X_IN:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
@@ -603,9 +601,7 @@ define i1 @src_is_mask_const_sgt(i8 %x_in) {
 
 define i1 @src_is_mask_const_sle(i8 %x_in) {
 ; CHECK-LABEL: @src_is_mask_const_sle(
-; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], 31
-; CHECK-NEXT:    [[R:%.*]] = icmp sle i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i8 [[X_IN:%.*]], -1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = xor i8 %x_in, 123
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
index a83572bff3251ae..bfb4abc80ae379b 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
@@ -57,7 +57,7 @@ define i1 @icmp_sge_x_negy(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[CY:%.*]] = icmp slt i8 [[Y:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CY]])
 ; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp sge i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp eq i8 [[AND]], [[X]]
 ; CHECK-NEXT:    ret i1 [[Z]]
 ;
   %cy = icmp slt i8 %y, 0
@@ -73,7 +73,7 @@ define i1 @icmp_slt_x_negy(i8 %x, i8 %y) {
 ; CHECK-NEXT:    br i1 [[CY]], label [[NEGY:%.*]], label [[POSY:%.*]]
 ; CHECK:       negy:
 ; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp slt i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp ne i8 [[AND]], [[X]]
 ; CHECK-NEXT:    ret i1 [[Z]]
 ; CHECK:       posy:
 ; CHECK-NEXT:    [[R:%.*]] = call i1 @barrier()
@@ -115,10 +115,7 @@ posy:
 
 define i1 @icmp_sle_x_negy(i8 %x, i8 %yy) {
 ; CHECK-LABEL: @icmp_sle_x_negy(
-; CHECK-NEXT:    [[Y:%.*]] = or i8 [[YY:%.*]], -128
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp sle i8 [[AND]], [[X]]
-; CHECK-NEXT:    ret i1 [[Z]]
+; CHECK-NEXT:    ret i1 true
 ;
   %y = or i8 %yy, 128
   %and = and i8 %y, %x
@@ -128,10 +125,7 @@ define i1 @icmp_sle_x_negy(i8 %x, i8 %yy) {
 
 define <2 x i1> @icmp_sgt_x_negy(<2 x i8> %x, <2 x i8> %yy) {
 ; CHECK-LABEL: @icmp_sgt_x_negy(
-; CHECK-NEXT:    [[Y:%.*]] = or <2 x i8> [[YY:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[AND]], [[X]]
-; CHECK-NEXT:    ret <2 x i1> [[Z]]
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %y = or <2 x i8> %yy, <i8 128, i8 128>
   %and = and <2 x i8> %y, %x
@@ -154,9 +148,7 @@ define <2 x i1> @icmp_sgt_x_negy_fail_partial(<2 x i8> %x, <2 x i8> %yy) {
 
 define <2 x i1> @icmp_sle_x_posy(<2 x i8> %x, <2 x i8> %yy) {
 ; CHECK-LABEL: @icmp_sle_x_posy(
-; CHECK-NEXT:    [[Y:%.*]] = and <2 x i8> [[YY:%.*]], <i8 127, i8 127>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp sle <2 x i8> [[AND]], [[X]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %y = and <2 x i8> %yy, <i8 127, i8 127>
@@ -182,8 +174,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) {
 ; CHECK-LABEL: @icmp_sgt_x_posy(
 ; CHECK-NEXT:    [[CY:%.*]] = icmp sgt i8 [[Y:%.*]], -1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CY]])
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp sgt i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt i8 [[X:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[Z]]
 ;
   %cy = icmp sge i8 %y, 0
@@ -195,9 +186,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) {
 
 define <2 x i1> @icmp_sgt_negx_y(<2 x i8> %xx, <2 x i8> %y) {
 ; CHECK-LABEL: @icmp_sgt_negx_y(
-; CHECK-NEXT:    [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[AND]], [[X]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %x = or <2 x i8> %xx, <i8 128, i8 128>
@@ -210,8 +199,7 @@ define i1 @icmp_sle_negx_y(i8 %x, i8 %y) {
 ; CHECK-LABEL: @icmp_sle_negx_y(
 ; CHECK-NEXT:    [[CX:%.*]] = icmp slt i8 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CX]])
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp sle i8 [[AND]], [[X]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt i8 [[Y:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[Z]]
 ;
   %cx = icmp slt i8 %x, 0

>From 302e32b3a906e8ae0a94a72c1fd3bdb9e90c2b05 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:46:06 -0500
Subject: [PATCH 10/13] [InstCombine] Add tests for folding multiuse `(icmp
 eq/ne (or X, Y), Y)`; NFC

Differential Revision: https://reviews.llvm.org/D159061
---
 .../Transforms/InstCombine/icmp-of-or-x.ll    | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 4b8df439b846fb6..b8773057d18fc17 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -377,3 +377,30 @@ define i1 @pr64610(ptr %b) {
   %r = icmp ugt i32 %or, %s
   ret i1 %r
 }
+
+define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y, i8 %z) {
+; CHECK-LABEL: @icmp_eq_x_invertable_y2_todo(
+; CHECK-NEXT:    [[ZZ:%.*]] = xor i8 [[Z:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 [[ZZ]]
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[YY]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[YY]], [[OR]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %zz = xor i8 %z, -1
+  %yy = select i1 %y, i8 7, i8 %zz
+  %or = or i8 %x, %yy
+  %r = icmp eq i8 %yy, %or
+  ret i1 %r
+}
+
+define i1 @icmp_eq_x_invertable_y2(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_x_invertable_y2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %yy = xor i8 %y, -1
+  %or = or i8 %x, %yy
+  %r = icmp eq i8 %yy, %or
+  ret i1 %r
+}

>From 9211164eb9c80cb6813aa3ea4a5b609bc375e0a3 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:46:08 -0500
Subject: [PATCH 11/13] [InstCombine] Folding multiuse `(icmp eq/ne (or X, Y),
 Y)` for 2 uses of `Y`

The fold will replace 2 uses of `Y` we should also do fold if `Y` has
2 uses (not only oneuse).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D159062
---
 llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 2 +-
 llvm/test/Transforms/InstCombine/icmp-of-or-x.ll        | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e2cdf0cd5c4349b..47dcb653649e211 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4747,7 +4747,7 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
 
   if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
     // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
-    if (IC.isFreeToInvert(Op1, Op1->hasOneUse()))
+    if (IC.isFreeToInvert(Op1, !Op1->hasNUsesOrMore(3)))
       return new ICmpInst(Pred,
                           IC.Builder.CreateAnd(A, IC.Builder.CreateNot(Op1)),
                           Constant::getNullValue(Op1->getType()));
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index b8773057d18fc17..050636e3332e9cc 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -380,10 +380,9 @@ define i1 @pr64610(ptr %b) {
 
 define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y, i8 %z) {
 ; CHECK-LABEL: @icmp_eq_x_invertable_y2_todo(
-; CHECK-NEXT:    [[ZZ:%.*]] = xor i8 [[Z:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 [[ZZ]]
-; CHECK-NEXT:    [[OR:%.*]] = or i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[YY]], [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP2]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %zz = xor i8 %z, -1

>From 17d51d0323202d10eae38bf866bd90650141c87b Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:46:11 -0500
Subject: [PATCH 12/13] [InstCombine] Add tests for transforming `(or/and (icmp
 eq/ne X,0),(icmp eq/ne X,Pow2OrZero))`; NFC

Differential Revision: https://reviews.llvm.org/D157311
---
 .../Transforms/InstCombine/and-or-icmps.ll    | 145 ++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index 881a9b7ff129dbb..3b43b008367e788 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -3044,3 +3044,148 @@ define i32 @icmp_slt_0_or_icmp_add_1_sge_100_i32_fail(i32 %x) {
   %D = or i32 %C, %B
   ret i32 %D
 }
+
+define i1 @icmp_eq_or_z_or_pow2orz(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz(
+; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ny = sub i8 0, %y
+  %pow2orz = and i8 %ny, %y
+
+  %c0 = icmp eq i8 %x, 0
+  %cp2 = icmp eq i8 %x, %pow2orz
+  %r = or i1 %c0, %cp2
+  ret i1 %r
+}
+
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_logic_or(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_logic_or(
+; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C0]], i1 true, i1 [[CP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ny = sub i8 0, %y
+  %pow2orz = and i8 %ny, %y
+
+  %c0 = icmp eq i8 %x, 0
+  %cp2 = icmp eq i8 %x, %pow2orz
+  %r = select i1 %c0, i1 true, i1 %cp2
+  ret i1 %r
+}
+
+
+define <2 x i1> @icmp_ne_and_z_and_pow2orz(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @icmp_ne_and_z_and_pow2orz(
+; CHECK-NEXT:    [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and <2 x i8> [[NY]], [[Y]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[CP2:%.*]] = icmp ne <2 x i8> [[POW2ORZ]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[C0]], [[CP2]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %ny = sub <2 x i8> zeroinitializer, %y
+  %pow2orz = and <2 x i8> %ny, %y
+
+  %c0 = icmp ne <2 x i8> %x, zeroinitializer
+  %cp2 = icmp ne <2 x i8> %x, %pow2orz
+  %r = and <2 x i1> %c0, %cp2
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @icmp_ne_and_z_and_pow2orz_fail_logic_and(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @icmp_ne_and_z_and_pow2orz_fail_logic_and(
+; CHECK-NEXT:    [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and <2 x i8> [[NY]], [[Y]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[CP2:%.*]] = icmp ne <2 x i8> [[POW2ORZ]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C0]], <2 x i1> [[CP2]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %ny = sub <2 x i8> zeroinitializer, %y
+  %pow2orz = and <2 x i8> %ny, %y
+
+  %c0 = icmp ne <2 x i8> %x, zeroinitializer
+  %cp2 = icmp ne <2 x i8> %x, %pow2orz
+  %r = select <2 x i1> %c0, <2 x i1> %cp2, <2 x i1> zeroinitializer
+  ret <2 x i1> %r
+}
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_not_pow2(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_not_pow2(
+; CHECK-NEXT:    [[NY:%.*]] = sub i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ny = sub i8 1, %y
+  %pow2orz = and i8 %ny, %y
+
+  %c0 = icmp eq i8 %x, 0
+  %cp2 = icmp eq i8 %x, %pow2orz
+  %r = or i1 %c0, %cp2
+  ret i1 %r
+}
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_nonzero_const(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_nonzero_const(
+; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp eq i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ny = sub i8 0, %y
+  %pow2orz = and i8 %ny, %y
+
+  %c0 = icmp eq i8 %x, 1
+  %cp2 = icmp eq i8 %x, %pow2orz
+  %r = or i1 %c0, %cp2
+  ret i1 %r
+}
+
+define <2 x i1> @icmp_ne_and_z_and_pow2orz_fail_bad_pred(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @icmp_ne_and_z_and_pow2orz_fail_bad_pred(
+; CHECK-NEXT:    [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and <2 x i8> [[NY]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[POW2ORZ]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %ny = sub <2 x i8> zeroinitializer, %y
+  %pow2orz = and <2 x i8> %ny, %y
+
+  %c0 = icmp eq <2 x i8> %x, zeroinitializer
+  %cp2 = icmp eq <2 x i8> %x, %pow2orz
+  %r = and <2 x i1> %c0, %cp2
+  ret <2 x i1> %r
+}
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_bad_pred2(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_bad_pred2(
+; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[CP2:%.*]] = icmp sge i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ny = sub i8 0, %y
+  %pow2orz = and i8 %ny, %y
+
+  %c0 = icmp sle i8 %x, 0
+  %cp2 = icmp sle i8 %x, %pow2orz
+  %r = or i1 %c0, %cp2
+  ret i1 %r
+}

>From 98aa59b063469556d8d98195f123dcb9d7e31245 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 15 Sep 2023 12:47:54 -0500
Subject: [PATCH 13/13] [InstCombine] Add transforms for `(or/and (icmp eq/ne
 X,0),(icmp eq/ne X,Pow2OrZero))`

`(or (icmp eq X, 0), (icmp eq X, Pow2OrZero))`
    --> `(icmp eq (and X, Pow2OrZero), X)`

`(and (icmp ne X, 0), (icmp ne X, Pow2OrZero))`
    --> `(icmp ne (and X, Pow2OrZero), X)`

Proofs: https://alive2.llvm.org/ce/z/nPo2BN

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D157312
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 37 +++++++++++++++++++
 .../Transforms/InstCombine/and-or-icmps.ll    | 10 ++---
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5a3d6dd20d60a08..b080cbfed17401d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -739,6 +739,38 @@ Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
   return Builder.CreateICmp(NewPred, Input, RangeEnd);
 }
 
+// (or (icmp eq X, 0), (icmp eq X, Pow2OrZero))
+//      -> (icmp eq (and X, Pow2OrZero), X)
+// (and (icmp ne X, 0), (icmp ne X, Pow2OrZero))
+//      -> (icmp ne (and X, Pow2OrZero), X)
+static Value *
+foldAndOrOfICmpsWithPow2AndWithZero(InstCombiner::BuilderTy &Builder,
+                                    ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+                                    const SimplifyQuery &Q) {
+  CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+  // Make sure we have right compares for our op.
+  if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred)
+    return nullptr;
+
+  // Make it so we can match LHS against the (icmp eq/ne X, 0) just for
+  // simplicity.
+  if (match(RHS->getOperand(1), m_Zero()))
+    std::swap(LHS, RHS);
+
+  Value *Pow2, *Op;
+  // Match the desired pattern:
+  // LHS: (icmp eq/ne X, 0)
+  // RHS: (icmp eq/ne X, Pow2OrZero)
+  if (!match(LHS, m_ICmp(Pred, m_Value(Op), m_Zero())) ||
+      !match(RHS, m_c_ICmp(Pred, m_Specific(Op), m_Value(Pow2))) ||
+      !isKnownToBeAPowerOfTwo(Pow2, Q.DL, /*OrZero*/ true, /*Depth*/ 0, Q.AC,
+                              Q.CxtI, Q.DT))
+    return nullptr;
+
+  Value *And = Builder.CreateAnd(Op, Pow2);
+  return Builder.CreateICmp(Pred, And, Op);
+}
+
 // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
 // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
 Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,
@@ -3102,6 +3134,11 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
   Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
   Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);
+  if (!IsLogical)
+    if (Value *V =
+            foldAndOrOfICmpsWithPow2AndWithZero(Builder, LHS, RHS, IsAnd, Q))
+      return V;
+
   const APInt *LHSC = nullptr, *RHSC = nullptr;
   match(LHS1, m_APInt(LHSC));
   match(RHS1, m_APInt(RHSC));
diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index 3b43b008367e788..eb91bd7cbf13d7b 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -3049,9 +3049,8 @@ define i1 @icmp_eq_or_z_or_pow2orz(i8 %x, i8 %y) {
 ; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz(
 ; CHECK-NEXT:    [[NY:%.*]] = sub i8 0, [[Y:%.*]]
 ; CHECK-NEXT:    [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
-; CHECK-NEXT:    [[C0:%.*]] = icmp eq i8 [[X:%.*]], 0
-; CHECK-NEXT:    [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[POW2ORZ]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %ny = sub i8 0, %y
@@ -3087,9 +3086,8 @@ define <2 x i1> @icmp_ne_and_z_and_pow2orz(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @icmp_ne_and_z_and_pow2orz(
 ; CHECK-NEXT:    [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
 ; CHECK-NEXT:    [[POW2ORZ:%.*]] = and <2 x i8> [[NY]], [[Y]]
-; CHECK-NEXT:    [[C0:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[CP2:%.*]] = icmp ne <2 x i8> [[POW2ORZ]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[C0]], [[CP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[POW2ORZ]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %ny = sub <2 x i8> zeroinitializer, %y



More information about the llvm-commits mailing list