[llvm] [InstCombine] Improve handling of `not` and free inversion. (PR #66787)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 19 13:22:03 PST 2023


https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/66787

>From 9ca108f2a734480afb30ab24c1c46babb1ceccb1 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sun, 19 Nov 2023 15:02:10 -0600
Subject: [PATCH 1/7] [InstCombine] Add tests for folding `(X + Y) - (W + Z)`;
 NFC

---
 llvm/test/Transforms/InstCombine/sub.ll | 46 +++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 754b532de20083d..08748d263ba1f78 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -2580,3 +2580,49 @@ define i8 @diff_of_muls2(i8 %x, i8 %y, i8 %z) {
   %r = sub i8 %x2, %m
   ret i8 %r
 }
+
+define i8 @sub_of_adds_2xz_multiuse(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @sub_of_adds_2xz_multiuse(
+; CHECK-NEXT:    [[XZ:%.*]] = add i8 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[YZ:%.*]] = add i8 [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[XZ]], [[YZ]]
+; CHECK-NEXT:    call void @use8(i8 [[XZ]])
+; CHECK-NEXT:    call void @use8(i8 [[YZ]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xz = add i8 %x, %z
+  %yz = add i8 %z, %y
+  %r = sub i8 %xz, %yz
+  call void @use8(i8 %xz)
+  call void @use8(i8 %yz)
+  ret i8 %r
+}
+
+define i8 @sub_of_adds_2xc_multiuse2_fail(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_of_adds_2xc_multiuse2_fail(
+; CHECK-NEXT:    [[XC:%.*]] = add i8 [[X:%.*]], 10
+; CHECK-NEXT:    [[YC:%.*]] = add i8 [[Y:%.*]], 11
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[XC]], [[YC]]
+; CHECK-NEXT:    call void @use8(i8 [[XC]])
+; CHECK-NEXT:    call void @use8(i8 [[YC]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xc = add i8 %x, 10
+  %yc = add i8 %y, 11
+  %r = sub i8 %xc, %yc
+  call void @use8(i8 %xc)
+  call void @use8(i8 %yc)
+  ret i8 %r
+}
+
+define i8 @sub_of_adds_2xc(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_of_adds_2xc(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], 2
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xc = add i8 %x, 10
+  %yc = add i8 %y, 8
+  %r = sub i8 %xc, %yc
+  ret i8 %r
+}

>From 2410ad53b367977170259ae27b7ec4e484c590f9 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sun, 19 Nov 2023 15:02:39 -0600
Subject: [PATCH 2/7] [InstCombine] Add folds for `(X + Y) - (W + Z)`

If `Y` and `Z` are constant then we can simplify to `(X - W) + (Y -
Z)`. If `Y == Z` we can fold to `X - W`.

Note these transform exist outside of InstCombine. The purpose of this
commit is primarily to make it so that folds can generate these
simplifiable patterns without having to worry about creating an inf
loop.
---
 .../InstCombine/InstCombineAddSub.cpp         | 19 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/sub.ll       |  6 +++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 90b1c133461a408..65cafbdbe61cff6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2202,6 +2202,25 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
     return Sub;
   }
 
+  {
+    // (X + Z) - (Y + Z) --> (X - Y)
+    // This is done in other passes, but we want to be able to consume this
+    // pattern in InstCombine so we can generate it without creating infinite
+    // loops.
+    if (match(Op0, m_Add(m_Value(X), m_Value(Z))) &&
+        match(Op1, m_c_Add(m_Value(Y), m_Specific(Z))))
+      return BinaryOperator::CreateSub(X, Y);
+
+    // (X + C0) - (Y + C1) --> (X - Y) + (C0 - C1)
+    Constant *CX, *CY;
+    if (match(Op0, m_OneUse(m_Add(m_Value(X), m_ImmConstant(CX)))) &&
+        match(Op1, m_OneUse(m_Add(m_Value(Y), m_ImmConstant(CY))))) {
+      Value *OpsSub = Builder.CreateSub(X, Y);
+      Constant *ConstsSub = ConstantExpr::getSub(CX, CY);
+      return BinaryOperator::CreateAdd(OpsSub, ConstsSub);
+    }
+  }
+
   // (~X) - (~Y) --> Y - X
   // This is placed after the other reassociations and explicitly excludes a
   // sub-of-sub pattern to avoid infinite looping.
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 08748d263ba1f78..1fceca321da124a 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1110,8 +1110,8 @@ define i32 @test57(i32 %A, i32 %B) {
 
 define i64 @test58(ptr %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test58(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
-; CHECK-NEXT:    ret i64 [[TMP1]]
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
   %gep1 = getelementptr inbounds [100 x [100 x i8]], ptr %foo, i64 0, i64 42, i64 %i
   %gep2 = getelementptr inbounds [100 x [100 x i8]], ptr %foo, i64 0, i64 42, i64 %j
@@ -2585,7 +2585,7 @@ define i8 @sub_of_adds_2xz_multiuse(i8 %x, i8 %y, i8 %z) {
 ; CHECK-LABEL: @sub_of_adds_2xz_multiuse(
 ; CHECK-NEXT:    [[XZ:%.*]] = add i8 [[X:%.*]], [[Z:%.*]]
 ; CHECK-NEXT:    [[YZ:%.*]] = add i8 [[Z]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[XZ]], [[YZ]]
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[X]], [[Y]]
 ; CHECK-NEXT:    call void @use8(i8 [[XZ]])
 ; CHECK-NEXT:    call void @use8(i8 [[YZ]])
 ; CHECK-NEXT:    ret i8 [[R]]

>From 2a498b6922d6e44b1a13615427bce35bc330db3c Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 13 Sep 2023 13:45:47 -0500
Subject: [PATCH 3/7] [InstCombine] Make `isFreeToInvert` check recursively.

Some Instructions (select/min/max) are inverted by just inverting the
operands. So the answer of whether they are free to invert is really
just whether the operands are free to invert.

Differential Revision: https://reviews.llvm.org/D159056
---
 .../Transforms/InstCombine/InstCombiner.h     | 74 ++++++++++++-------
 .../InstCombine/InstCombineAddSub.cpp         | 17 +++--
 .../InstCombine/minmax-intrinsics.ll          | 13 ++--
 3 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index f8b3874267ded3b..3fc16d0e803a6db 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -239,45 +239,67 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
   /// uses of V and only keep uses of ~V.
   ///
   /// See also: canFreelyInvertAllUsersOf()
-  static bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
+  static bool isFreeToInvertImpl(Value *V, bool WillInvertAllUses,
+                                 bool &DoesConsume, unsigned Depth) {
+    using namespace llvm::PatternMatch;
     // ~(~(X)) -> X.
-    if (match(V, m_Not(PatternMatch::m_Value())))
+    if (match(V, m_Not(m_Value()))) {
+      DoesConsume = true;
       return true;
+    }
 
     // Constants can be considered to be not'ed values.
-    if (match(V, PatternMatch::m_AnyIntegralConstant()))
+    if (match(V, m_AnyIntegralConstant()))
       return true;
 
+    if (Depth++ >= MaxAnalysisRecursionDepth)
+      return false;
+
+    // The rest of the cases require that we invert all uses so don't bother
+    // doing the analysis if we know we can't use the result.
+    if (!WillInvertAllUses)
+      return false;
+
     // Compares can be inverted if all of their uses are being modified to use
     // the ~V.
     if (isa<CmpInst>(V))
-      return WillInvertAllUses;
-
-    // If `V` is of the form `A + Constant` then `-1 - V` can be folded into
-    // `(-1 - Constant) - A` if we are willing to invert all of the uses.
-    if (match(V, m_Add(PatternMatch::m_Value(), PatternMatch::m_ImmConstant())))
-      return WillInvertAllUses;
-
-    // If `V` is of the form `Constant - A` then `-1 - V` can be folded into
-    // `A + (-1 - Constant)` if we are willing to invert all of the uses.
-    if (match(V, m_Sub(PatternMatch::m_ImmConstant(), PatternMatch::m_Value())))
-      return WillInvertAllUses;
-
-    // Selects with invertible operands are freely invertible
-    if (match(V,
-              m_Select(PatternMatch::m_Value(), m_Not(PatternMatch::m_Value()),
-                       m_Not(PatternMatch::m_Value()))))
-      return WillInvertAllUses;
-
-    // Min/max may be in the form of intrinsics, so handle those identically
-    // to select patterns.
-    if (match(V, m_MaxOrMin(m_Not(PatternMatch::m_Value()),
-                            m_Not(PatternMatch::m_Value()))))
-      return WillInvertAllUses;
+      return true;
+
+    Value *A, *B;
+    // If `V` is of the form `A + B` then `-1 - V` can be folded into
+    // `~B - A` or `~A - B` if we are willing to invert all of the uses.
+    if (match(V, m_Add(m_Value(A), m_Value(B))))
+      return isFreeToInvertImpl(A, A->hasOneUse(), DoesConsume, Depth) ||
+             isFreeToInvertImpl(B, B->hasOneUse(), DoesConsume, Depth);
+
+    // If `V` is of the form `A - B` then `-1 - V` can be folded into
+    // `~A + B` if we are willing to invert all of the uses.
+    if (match(V, m_Sub(m_Value(A), m_Value())))
+      return isFreeToInvertImpl(A, A->hasOneUse(), DoesConsume, Depth);
+
+    // LogicOps are special in that we canonicalize them at the cost of an
+    // instruction.
+    bool IsSelect = match(V, m_Select(m_Value(), m_Value(A), m_Value(B))) &&
+                    !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
+    // Selects/min/max with invertible operands are freely invertible
+    if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B))))
+      return isFreeToInvertImpl(A, A->hasOneUse(), DoesConsume, Depth) &&
+             isFreeToInvertImpl(B, B->hasOneUse(), DoesConsume, Depth);
 
     return false;
   }
 
+  static bool isFreeToInvert(Value *V, bool WillInvertAllUses,
+                             bool &DoesConsume) {
+    DoesConsume = false;
+    return isFreeToInvertImpl(V, WillInvertAllUses, DoesConsume, /*Depth*/ 0);
+  }
+
+  static bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
+    bool Unused;
+    return isFreeToInvert(V, WillInvertAllUses, Unused);
+  }
+
   /// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
   /// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn.
   /// NOTE: for Instructions only!
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 65cafbdbe61cff6..14c67e31fde6b93 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2224,12 +2224,17 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
   // (~X) - (~Y) --> Y - X
   // This is placed after the other reassociations and explicitly excludes a
   // sub-of-sub pattern to avoid infinite looping.
-  if (isFreeToInvert(Op0, Op0->hasOneUse()) &&
-      isFreeToInvert(Op1, Op1->hasOneUse()) &&
-      !match(Op0, m_Sub(m_ImmConstant(), m_Value()))) {
-    Value *NotOp0 = Builder.CreateNot(Op0);
-    Value *NotOp1 = Builder.CreateNot(Op1);
-    return BinaryOperator::CreateSub(NotOp1, NotOp0);
+  {
+    // Need to ensure we can consume at least one of the `not` instructions,
+    // otherwise this can inf loop.
+    bool ConsumesOp0, ConsumesOp1;
+    if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&
+        isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&
+        (ConsumesOp0 || ConsumesOp1)) {
+      Value *NotOp0 = Builder.CreateNot(Op0);
+      Value *NotOp1 = Builder.CreateNot(Op1);
+      return BinaryOperator::CreateSub(NotOp1, NotOp0);
+    }
   }
 
   auto m_AddRdx = [](Value *&Vec) {
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 3802036d2a715ac..6df9bb06ca56d2e 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -1494,14 +1494,13 @@ define i8 @freeToInvert_two_minmax_ops_use3(i8 %x, i8 %y, i8 %z, i8 %w) {
 
 define i8 @sub_not_min_max(i8 %r, i8 %g, i8 %b) {
 ; CHECK-LABEL: @sub_not_min_max(
-; CHECK-NEXT:    [[NOTR:%.*]] = xor i8 [[R:%.*]], -1
 ; CHECK-NEXT:    [[NOTG:%.*]] = xor i8 [[G:%.*]], -1
 ; CHECK-NEXT:    call void @use(i8 [[NOTG]])
 ; CHECK-NEXT:    [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
 ; CHECK-NEXT:    call void @use(i8 [[NOTB]])
-; CHECK-NEXT:    [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
-; CHECK-NEXT:    [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
-; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[B]])
+; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
 ; CHECK-NEXT:    ret i8 [[CK]]
 ;
   %notr = xor i8 %r, -1
@@ -1523,9 +1522,9 @@ define i8 @sub_not_min_max_uses1(i8 %r, i8 %g, i8 %b) {
 ; CHECK-NEXT:    call void @use(i8 [[NOTG]])
 ; CHECK-NEXT:    [[NOTB:%.*]] = xor i8 [[B:%.*]], -1
 ; CHECK-NEXT:    call void @use(i8 [[NOTB]])
-; CHECK-NEXT:    [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]])
-; CHECK-NEXT:    [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]])
-; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[NOTR]], [[K]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R]], i8 [[G]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[B]])
+; CHECK-NEXT:    [[CK:%.*]] = sub i8 [[TMP2]], [[R]]
 ; CHECK-NEXT:    ret i8 [[CK]]
 ;
   %notr = xor i8 %r, -1

>From c094bc867edb7da452490628457cf51b12ad6176 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Mon, 18 Sep 2023 14:59:58 -0500
Subject: [PATCH 4/7] [InstCombine] Add additional tests for free inversion;
 NFC

---
 .../Transforms/InstCombine/free-inversion.ll  | 520 ++++++++++++++++++
 1 file changed, 520 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/free-inversion.ll

diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
new file mode 100644
index 000000000000000..495028ccb970f21
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -0,0 +1,520 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i8 @llvm.smin.i8(i8, i8)
+declare i8 @llvm.umin.i8(i8, i8)
+declare i8 @llvm.smax.i8(i8, i8)
+declare i8 @llvm.umax.i8(i8, i8)
+
+declare void @use.i8(i8)
+
+define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @xor_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = xor i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = xor i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @xor_2(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @xor_2(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = xor i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = xor i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @xor_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @xor_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[Y:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = xor i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %b = select i1 %c, i8 %nx, i8 %y
+  %ab = xor i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @add_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @add_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = add i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = add i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @add_2(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @add_2(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = add i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @add_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @add_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, %a
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = add i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @sub_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = sub i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_2(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[A:%.*]], -1
+; CHECK-NEXT:    [[NOT_AB:%.*]] = add i8 [[B]], [[TMP1]]
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = sub i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    call void @use.i8(i8 [[NX]])
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  call void @use.i8(i8 %nx)
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = sub i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @ashr_1(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @ashr_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = ashr i8 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = ashr i8 %b, %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @ashr_2_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @ashr_2_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = ashr i8 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = ashr i8 %a, %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @select_1(i1 %cc, i8 %na, i8 %aa, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @select_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NA:%.*]], [[AA:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[TMP1]], 45
+; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i8 [[A]], i8 [[B]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, 45
+  %a = xor i8 %aa, %nna
+  %ab = select i1 %cc, i8 %a, i8 %b
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @select_2(i1 %cc, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @select_2(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[NA:%.*]], 45
+; CHECK-NEXT:    [[BA:%.*]] = select i1 [[CC:%.*]], i8 [[B]], i8 [[A]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %a = xor i8 %na, 45
+  %ba = select i1 %cc, i8 %b, i8 %a
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i1 @select_logic_or_fail(i1 %cc, i1 %c, i1 %x, i8 %y) {
+; CHECK-LABEL: @select_logic_or_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[YY:%.*]] = icmp eq i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i1 [[NX]], i1 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i1 [[B]], i1 false
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i1 [[AB]], true
+; CHECK-NEXT:    ret i1 [[NOT_AB]]
+;
+  %nx = xor i1 %x, -1
+  %yy = icmp eq i8 %y, 123
+  %b = select i1 %c, i1 %nx, i1 %yy
+  %ab = select i1 %cc, i1 %b, i1 false
+  %not_ab = xor i1 %ab, -1
+  ret i1 %not_ab
+}
+
+define i1 @select_logic_and_fail(i1 %cc, i1 %c, i1 %x, i8 %y) {
+; CHECK-LABEL: @select_logic_and_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[YY:%.*]] = icmp eq i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i1 [[NX]], i1 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i1 true, i1 [[B]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i1 [[AB]], true
+; CHECK-NEXT:    ret i1 [[NOT_AB]]
+;
+  %nx = xor i1 %x, -1
+  %yy = icmp eq i8 %y, 123
+  %b = select i1 %c, i1 %nx, i1 %yy
+  %ab = select i1 %cc, i1 true, i1 %b
+  %not_ab = xor i1 %ab, -1
+  ret i1 %not_ab
+}
+
+define i8 @smin_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smin_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[NNA]], [[AA:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smin.i8(i8 [[A]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, -1
+  %a = add i8 %aa, %nna
+  %ab = call i8 @llvm.smin.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @smin_1_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smin_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smin.i8(i8 [[A:%.*]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ab = call i8 @llvm.smin.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @umin_1_fail(i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @umin_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[Y:%.*]]
+; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umin.i8(i8 [[B]], i8 85)
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %b = select i1 %c, i8 %nx, i8 %y
+  %ba = call i8 @llvm.umin.i8(i8 %b, i8 85)
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @smax_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smax_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[NNA]], [[AA:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, -1
+  %a = sub i8 %nna, %aa
+  %ab = call i8 @llvm.smax.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @smax_1_fail(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @smax_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    call void @use.i8(i8 [[YY]])
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[NNA]], [[AA:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 [[B]])
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    ret i8 [[NOT_AB]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  call void @use.i8(i8 %yy)
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %nna = xor i8 %na, -1
+  %a = sub i8 %nna, %aa
+  %ab = call i8 @llvm.smax.i8(i8 %a, i8 %b)
+  %not_ab = xor i8 %ab, -1
+  ret i8 %not_ab
+}
+
+define i8 @umax_1(i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @umax_1(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 85)
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  %ba = call i8 @llvm.umax.i8(i8 %b, i8 85)
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @umax_1_fail(i8 %na, i1 %c, i8 %x, i8 %y) {
+; CHECK-LABEL: @umax_1_fail(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
+; CHECK-NEXT:    call void @use.i8(i8 [[B]])
+; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 85)
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    ret i8 [[NOT_BA]]
+;
+  %nx = xor i8 %x, -1
+  %yy = xor i8 %y, 123
+  %b = select i1 %c, i8 %nx, i8 %yy
+  call void @use.i8(i8 %b)
+  %ba = call i8 @llvm.umax.i8(i8 %b, i8 85)
+  %not_ba = xor i8 %ba, -1
+  ret i8 %not_ba
+}
+
+define i8 @sub_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[XX]], [[YY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = sub i8 %xx, %yy
+  ret i8 %r
+}
+
+define i8 @add_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @add_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[XX]], [[YY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = add i8 %xx, %yy
+  ret i8 %r
+}
+
+define i8 @xor_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @xor_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = add i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = add i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[XX]], [[YY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = add i8 %x, 123
+  %yy = add i8 %y, 45
+  %r = xor i8 %xx, %yy
+  ret i8 %r
+}
+
+define i8 @ashr_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @ashr_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = ashr i8 [[XX]], [[YY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = ashr i8 %xx, %yy
+  ret i8 %r
+}
+
+define i8 @select_both_freely_invertable_always(i1 %cc, i8 %x, i8 %y) {
+; CHECK-LABEL: @select_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], i8 [[XX]], i8 [[YY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = select i1 %cc, i8 %xx, i8 %yy
+  ret i8 %r
+}
+
+define i8 @umin_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @umin_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.umin.i8(i8 [[XX]], i8 [[YY]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = call i8 @llvm.umin.i8(i8 %xx, i8 %yy)
+  ret i8 %r
+}
+
+define i8 @umax_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @umax_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.umax.i8(i8 [[XX]], i8 [[YY]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = call i8 @llvm.umax.i8(i8 %xx, i8 %yy)
+  ret i8 %r
+}
+
+define i8 @smin_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @smin_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.smin.i8(i8 [[XX]], i8 [[YY]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = call i8 @llvm.smin.i8(i8 %xx, i8 %yy)
+  ret i8 %r
+}
+
+define i8 @smax_both_freely_invertable_always(i8 %x, i8 %y) {
+; CHECK-LABEL: @smax_both_freely_invertable_always(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 123
+; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.smax.i8(i8 [[XX]], i8 [[YY]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %xx = xor i8 %x, 123
+  %yy = xor i8 %y, 45
+  %r = call i8 @llvm.smax.i8(i8 %xx, i8 %yy)
+  ret i8 %r
+}

>From 1bb6313210cb2414fe3922173c7fe7f8de6851bc Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Mon, 18 Sep 2023 15:12:51 -0500
Subject: [PATCH 5/7] [InstCombine] add `getFreeInverted` to perform folds for
 free inversion of op

With the current logic of `if(isFreeToInvert(Op)) return Not(Op)` its
fairly easy to either 1) cause regressions or 2) infinite loops
if the folds we have for `Not(Op)` ever de-sync with the cases we
know are freely invertible.

This patch adds `getFreeInverted` which is able to build the free
inverted op along with check for free inversion to alleviate this
problem.
---
 .../Transforms/InstCombine/InstCombiner.h     |  76 ++++---------
 .../InstCombine/InstCombineAndOrXor.cpp       |   5 +
 .../InstCombine/InstructionCombining.cpp      | 104 +++++++++++++++++
 .../Transforms/InstCombine/free-inversion.ll  | 105 +++++++-----------
 .../Transforms/InstCombine/icmp-of-or-x.ll    |   6 +-
 5 files changed, 176 insertions(+), 120 deletions(-)

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index 3fc16d0e803a6db..94e53bc86d1dbb4 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -233,66 +233,38 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
                                                 PatternMatch::m_Value()));
   }
 
+  /// Return nonnull value if V is free to invert (with condition) regarding
+  /// WillInvertAllUses.
+  /// If Builder is nonnull, it will return a simplified ~V
+  /// If Builder is null, it will return an arbitrary nonnull value (not
+  /// dereferenceable).
+  static Value *getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
+                                      BuilderTy *Builder, bool &DoesConsume,
+                                      unsigned Depth);
+
+  static Value *getFreelyInverted(Value *V, bool WillInvertAllUses,
+                                  BuilderTy *Builder, bool &DoesConsume) {
+    DoesConsume = false;
+    return getFreelyInvertedImpl(V, WillInvertAllUses, Builder, DoesConsume,
+                                 /*Depth*/ 0);
+  }
+
+  static Value *getFreelyInverted(Value *V, bool WillInvertAllUses,
+                                  BuilderTy *Builder) {
+    bool Unused;
+    return getFreelyInverted(V, WillInvertAllUses, Builder, Unused);
+  }
+
   /// Return true if the specified value is free to invert (apply ~ to).
   /// This happens in cases where the ~ can be eliminated.  If WillInvertAllUses
   /// is true, work under the assumption that the caller intends to remove all
   /// uses of V and only keep uses of ~V.
   ///
   /// See also: canFreelyInvertAllUsersOf()
-  static bool isFreeToInvertImpl(Value *V, bool WillInvertAllUses,
-                                 bool &DoesConsume, unsigned Depth) {
-    using namespace llvm::PatternMatch;
-    // ~(~(X)) -> X.
-    if (match(V, m_Not(m_Value()))) {
-      DoesConsume = true;
-      return true;
-    }
-
-    // Constants can be considered to be not'ed values.
-    if (match(V, m_AnyIntegralConstant()))
-      return true;
-
-    if (Depth++ >= MaxAnalysisRecursionDepth)
-      return false;
-
-    // The rest of the cases require that we invert all uses so don't bother
-    // doing the analysis if we know we can't use the result.
-    if (!WillInvertAllUses)
-      return false;
-
-    // Compares can be inverted if all of their uses are being modified to use
-    // the ~V.
-    if (isa<CmpInst>(V))
-      return true;
-
-    Value *A, *B;
-    // If `V` is of the form `A + B` then `-1 - V` can be folded into
-    // `~B - A` or `~A - B` if we are willing to invert all of the uses.
-    if (match(V, m_Add(m_Value(A), m_Value(B))))
-      return isFreeToInvertImpl(A, A->hasOneUse(), DoesConsume, Depth) ||
-             isFreeToInvertImpl(B, B->hasOneUse(), DoesConsume, Depth);
-
-    // If `V` is of the form `A - B` then `-1 - V` can be folded into
-    // `~A + B` if we are willing to invert all of the uses.
-    if (match(V, m_Sub(m_Value(A), m_Value())))
-      return isFreeToInvertImpl(A, A->hasOneUse(), DoesConsume, Depth);
-
-    // LogicOps are special in that we canonicalize them at the cost of an
-    // instruction.
-    bool IsSelect = match(V, m_Select(m_Value(), m_Value(A), m_Value(B))) &&
-                    !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
-    // Selects/min/max with invertible operands are freely invertible
-    if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B))))
-      return isFreeToInvertImpl(A, A->hasOneUse(), DoesConsume, Depth) &&
-             isFreeToInvertImpl(B, B->hasOneUse(), DoesConsume, Depth);
-
-    return false;
-  }
-
   static bool isFreeToInvert(Value *V, bool WillInvertAllUses,
                              bool &DoesConsume) {
-    DoesConsume = false;
-    return isFreeToInvertImpl(V, WillInvertAllUses, DoesConsume, /*Depth*/ 0);
+    return getFreelyInverted(V, WillInvertAllUses, /*Builder*/ nullptr,
+                             DoesConsume) != nullptr;
   }
 
   static bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 46af9bf5eed003a..0750c1529d0b3c2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -4374,6 +4374,11 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
   if (Instruction *NewXor = foldNotXor(I, Builder))
     return NewXor;
 
+  // TODO: Could handle multi-use better by checking if all uses of NotOp (other
+  // than I) can be inverted.
+  if (Value *R = getFreelyInverted(NotOp, NotOp->hasOneUse(), &Builder))
+    return replaceInstUsesWith(I, R);
+
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5859f58a9f462b0..0713be330fa04ba 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2106,6 +2106,110 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
   return nullptr;
 }
 
+Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
+                                           BuilderTy *Builder,
+                                           bool &DoesConsume, unsigned Depth) {
+  static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
+  // ~(~(X)) -> X.
+  Value *A, *B;
+  if (match(V, m_Not(m_Value(A)))) {
+    DoesConsume = true;
+    return A;
+  }
+
+  Constant *C;
+  // Constants can be considered to be not'ed values.
+  if (match(V, m_ImmConstant(C)))
+    return ConstantExpr::getNot(C);
+
+  if (Depth++ >= MaxAnalysisRecursionDepth)
+    return nullptr;
+
+  // The rest of the cases require that we invert all uses so don't bother
+  // doing the analysis if we know we can't use the result.
+  if (!WillInvertAllUses)
+    return nullptr;
+
+  // Compares can be inverted if all of their uses are being modified to use
+  // the ~V.
+  if (auto *I = dyn_cast<CmpInst>(V)) {
+    if (Builder != nullptr)
+      return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
+                                I->getOperand(1));
+    return NonNull;
+  }
+
+  // If `V` is of the form `A + B` then `-1 - V` can be folded into
+  // `(-1 - B) - A` if we are willing to invert all of the uses.
+  if (match(V, m_Add(m_Value(A), m_Value(B)))) {
+    if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
+                                         DoesConsume, Depth))
+      return Builder ? Builder->CreateSub(BV, A) : NonNull;
+    if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+                                         DoesConsume, Depth))
+      return Builder ? Builder->CreateSub(AV, B) : NonNull;
+    return nullptr;
+  }
+
+  // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
+  // into `A ^ B` if we are willing to invert all of the uses.
+  if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
+    if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
+                                         DoesConsume, Depth))
+      return Builder ? Builder->CreateXor(A, BV) : NonNull;
+    if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+                                         DoesConsume, Depth))
+      return Builder ? Builder->CreateXor(AV, B) : NonNull;
+    return nullptr;
+  }
+
+  // If `V` is of the form `B - A` then `-1 - V` can be folded into
+  // `A + (-1 - B)` if we are willing to invert all of the uses.
+  if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
+    if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+                                         DoesConsume, Depth))
+      return Builder ? Builder->CreateAdd(AV, B) : NonNull;
+    return nullptr;
+  }
+
+  // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
+  // into `A s>> B` if we are willing to invert all of the uses.
+  if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
+    if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+                                         DoesConsume, Depth))
+      return Builder ? Builder->CreateAShr(AV, B) : NonNull;
+    return nullptr;
+  }
+
+  Value *Cond;
+  // LogicOps are special in that we canonicalize them at the cost of an
+  // instruction.
+  bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
+                  !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
+  // Selects/min/max with invertible operands are freely invertible
+  if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
+    if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
+                               DoesConsume, Depth))
+      return nullptr;
+    if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
+                                            DoesConsume, Depth)) {
+      if (Builder != nullptr) {
+        Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
+                                            DoesConsume, Depth);
+        assert(NotB != nullptr &&
+               "Unable to build inverted value for known freely invertable op");
+        if (auto *II = dyn_cast<IntrinsicInst>(V))
+          return Builder->CreateBinaryIntrinsic(
+              getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
+        return Builder->CreateSelect(Cond, NotA, NotB);
+      }
+      return NonNull;
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
   SmallVector<Value *, 8> Indices(GEP.indices());
diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
index 495028ccb970f21..d3b6a5801b53b6a 100644
--- a/llvm/test/Transforms/InstCombine/free-inversion.ll
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -10,11 +10,9 @@ declare void @use.i8(i8)
 
 define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @xor_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = xor i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[B_NOT:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[B_NOT]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -27,11 +25,9 @@ define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @xor_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @xor_2(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[AB:%.*]] = xor i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[B_NOT:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[B_NOT]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -59,11 +55,9 @@ define i8 @xor_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @add_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @add_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = add i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = sub i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -76,11 +70,9 @@ define i8 @add_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @add_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @add_2(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[AB:%.*]] = add i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = sub i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -110,11 +102,9 @@ define i8 @add_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @sub_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
-; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -145,11 +135,10 @@ define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_fail(
 ; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
 ; CHECK-NEXT:    call void @use.i8(i8 [[NX]])
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[B]], -1
-; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = add i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -163,11 +152,9 @@ define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @ashr_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = ashr i8 [[B]], [[A:%.*]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = ashr i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -197,13 +184,11 @@ define i8 @ashr_2_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @select_1(i1 %cc, i8 %na, i8 %aa, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @select_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[NA:%.*]], [[AA:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[TMP1]], 45
-; CHECK-NEXT:    [[AB:%.*]] = select i1 [[CC:%.*]], i8 [[A]], i8 [[B]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[TMP1]], -46
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP3]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = select i1 [[CC:%.*]], i8 [[TMP2]], i8 [[TMP4]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -218,12 +203,10 @@ define i8 @select_1(i1 %cc, i8 %na, i8 %aa, i1 %c, i8 %x, i8 %y) {
 
 define i8 @select_2(i1 %cc, i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @select_2(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[NA:%.*]], 45
-; CHECK-NEXT:    [[BA:%.*]] = select i1 [[CC:%.*]], i8 [[B]], i8 [[A]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[NA:%.*]], -46
+; CHECK-NEXT:    [[NOT_BA:%.*]] = select i1 [[CC:%.*]], i8 [[TMP2]], i8 [[TMP3]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -271,13 +254,10 @@ define i1 @select_logic_and_fail(i1 %cc, i1 %c, i1 %x, i8 %y) {
 
 define i8 @smin_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @smin_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[NNA]], [[AA:%.*]]
-; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smin.i8(i8 [[A]], i8 [[B]])
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 [[NA:%.*]], [[AA:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP2]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[TMP3]])
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -324,13 +304,10 @@ define i8 @umin_1_fail(i1 %c, i8 %x, i8 %y) {
 
 define i8 @smax_1(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @smax_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[NNA:%.*]] = xor i8 [[NA:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = sub i8 [[NNA]], [[AA:%.*]]
-; CHECK-NEXT:    [[AB:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 [[B]])
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[AB]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[NA:%.*]], [[AA:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP2]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP1]], i8 [[TMP3]])
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
@@ -368,11 +345,9 @@ define i8 @smax_1_fail(i8 %aa, i8 %na, i1 %c, i8 %x, i8 %y) {
 
 define i8 @umax_1(i8 %na, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @umax_1(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[BA:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 85)
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[BA]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = call i8 @llvm.umin.i8(i8 [[TMP2]], i8 -86)
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 839aa98a8b24e2f..4b8df439b846fb6 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -121,9 +121,9 @@ define i1 @or_ne_notY_eq_1s(i8 %x, i8 %y) {
 
 define i1 @or_ne_notY_eq_1s_fail_bad_not(i8 %x, i8 %y) {
 ; CHECK-LABEL: @or_ne_notY_eq_1s_fail_bad_not(
-; CHECK-NEXT:    [[NY:%.*]] = xor i8 [[Y:%.*]], -2
-; CHECK-NEXT:    [[OR:%.*]] = or i8 [[NY]], [[X:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[OR]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP2]], -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ny = xor i8 %y, -2

>From 4e122ebd9d0aa0ee853321534381573a671ce533 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sat, 18 Nov 2023 00:17:02 -0600
Subject: [PATCH 6/7] [InstCombine] Replace `isFreeToInvert` + `CreateNot` with
 `getFreelyInverted`

This is nearly an NFC, the only change is potentially to order that
values are created/names.

Otherwise it is a slight speed boost/simplification to avoid having to
go through the `getFreelyInverted` recursive logic twice to simplify
the extra `not` op.
---
 .../InstCombine/InstCombineAddSub.cpp         |  6 +-
 .../InstCombine/InstCombineAndOrXor.cpp       | 56 ++++----------
 .../InstCombine/InstCombineCalls.cpp          | 12 +--
 .../InstCombine/InstCombineCompares.cpp       | 21 +++---
 .../InstCombine/InstCombineSelect.cpp         | 20 ++---
 .../InstCombine/demorgan-sink-not-into-xor.ll | 74 +++++++++----------
 .../Transforms/InstCombine/free-inversion.ll  |  8 +-
 .../Transforms/InstCombine/icmp-mul-zext.ll   |  2 +-
 .../InstCombine/minmax-intrinsics.ll          |  2 +-
 9 files changed, 90 insertions(+), 111 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 14c67e31fde6b93..e38ea7f62b9e6aa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2231,8 +2231,10 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
     if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&
         isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&
         (ConsumesOp0 || ConsumesOp1)) {
-      Value *NotOp0 = Builder.CreateNot(Op0);
-      Value *NotOp1 = Builder.CreateNot(Op1);
+      Value *NotOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);
+      Value *NotOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);
+      assert(NotOp0 != nullptr && NotOp1 != nullptr &&
+             "isFreeToInvert desynced with getFreelyInverted");
       return BinaryOperator::CreateSub(NotOp1, NotOp0);
     }
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 0750c1529d0b3c2..756dedbb5b248a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2503,16 +2503,24 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
       return BinaryOperator::CreateAnd(Op1, B);
 
     // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
-    if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
-      if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
-        if (Op1->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))
-          return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C));
+    if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+        match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) {
+      Value *NotC = Op1->hasOneUse()
+                        ? Builder.CreateNot(C)
+                        : getFreelyInverted(C, C->hasOneUse(), &Builder);
+      if (NotC != nullptr)
+        return BinaryOperator::CreateAnd(Op0, NotC);
+    }
 
     // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
-    if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
-      if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
-        if (Op0->hasOneUse() || isFreeToInvert(C, C->hasOneUse()))
-          return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
+    if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))) &&
+        match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) {
+      Value *NotC = Op0->hasOneUse()
+                        ? Builder.CreateNot(C)
+                        : getFreelyInverted(C, C->hasOneUse(), &Builder);
+      if (NotC != nullptr)
+        return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
+    }
 
     // (A | B) & (~A ^ B) -> A & B
     // (A | B) & (B ^ ~A) -> A & B
@@ -3988,26 +3996,6 @@ static Instruction *visitMaskedMerge(BinaryOperator &I,
   return nullptr;
 }
 
-// Transform
-//   ~(x ^ y)
-// into:
-//   (~x) ^ y
-// or into
-//   x ^ (~y)
-static Instruction *sinkNotIntoXor(BinaryOperator &I, Value *X, Value *Y,
-                                   InstCombiner::BuilderTy &Builder) {
-  // We only want to do the transform if it is free to do.
-  if (InstCombiner::isFreeToInvert(X, X->hasOneUse())) {
-    // Ok, good.
-  } else if (InstCombiner::isFreeToInvert(Y, Y->hasOneUse())) {
-    std::swap(X, Y);
-  } else
-    return nullptr;
-
-  Value *NotX = Builder.CreateNot(X, X->getName() + ".not");
-  return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan");
-}
-
 static Instruction *foldNotXor(BinaryOperator &I,
                                InstCombiner::BuilderTy &Builder) {
   Value *X, *Y;
@@ -4016,9 +4004,6 @@ static Instruction *foldNotXor(BinaryOperator &I,
   if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y))))))
     return nullptr;
 
-  if (Instruction *NewXor = sinkNotIntoXor(I, X, Y, Builder))
-    return NewXor;
-
   auto hasCommonOperand = [](Value *A, Value *B, Value *C, Value *D) {
     return A == C || A == D || B == C || B == D;
   };
@@ -4316,15 +4301,6 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
   // ~max(~X, Y) --> min(X, ~Y)
   auto *II = dyn_cast<IntrinsicInst>(NotOp);
   if (II && II->hasOneUse()) {
-    if (match(NotOp, m_MaxOrMin(m_Value(X), m_Value(Y))) &&
-        isFreeToInvert(X, X->hasOneUse()) &&
-        isFreeToInvert(Y, Y->hasOneUse())) {
-      Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
-      Value *NotX = Builder.CreateNot(X);
-      Value *NotY = Builder.CreateNot(Y);
-      Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, NotX, NotY);
-      return replaceInstUsesWith(I, InvMaxMin);
-    }
     if (match(NotOp, m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y)))) {
       Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
       Value *NotY = Builder.CreateNot(Y);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cbfebf0102876..f8346cd03849acf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1698,12 +1698,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
       Value *A;
       if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
-          !isFreeToInvert(A, A->hasOneUse()) &&
-          isFreeToInvert(Y, Y->hasOneUse())) {
-        Value *NotY = Builder.CreateNot(Y);
-        Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
-        Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
-        return BinaryOperator::CreateNot(InvMaxMin);
+          !isFreeToInvert(A, A->hasOneUse())) {
+        if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
+          Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
+          Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
+          return BinaryOperator::CreateNot(InvMaxMin);
+        }
       }
       return nullptr;
     };
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 783668db2da7670..b6e59f707e78268 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3281,10 +3281,12 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
   // icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0
   // Example: are all elements equal? --> are zero elements not equal?
   // TODO: Try harder to reduce compare of 2 freely invertible operands?
-  if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse() &&
-      isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) {
-    Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), DstType);
-    return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));
+  if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse()) {
+    if (Value *NotBCSrcOp =
+            getFreelyInverted(BCSrcOp, BCSrcOp->hasOneUse(), &Builder)) {
+      Value *Cast = Builder.CreateBitCast(NotBCSrcOp, DstType);
+      return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));
+    }
   }
 
   // If this is checking if all elements of an extended vector are clear or not,
@@ -4549,14 +4551,13 @@ static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
 
   if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
     // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
-    if (IC.isFreeToInvert(Op1, Op1->hasOneUse()))
-      return new ICmpInst(Pred,
-                          IC.Builder.CreateAnd(A, IC.Builder.CreateNot(Op1)),
+    if (Value *NotOp1 =
+            IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder))
+      return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),
                           Constant::getNullValue(Op1->getType()));
     // icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X  is freely invertible.
-    if (IC.isFreeToInvert(A, A->hasOneUse()))
-      return new ICmpInst(Pred,
-                          IC.Builder.CreateOr(Op1, IC.Builder.CreateNot(A)),
+    if (Value *NotA = IC.getFreelyInverted(A, A->hasOneUse(), &IC.Builder))
+      return new ICmpInst(Pred, IC.Builder.CreateOr(Op1, NotA),
                           Constant::getAllOnesValue(Op1->getType()));
   }
   return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 71c2d68881441ac..2dda46986f0fd08 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3075,18 +3075,18 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
     return SelectInst::Create(TrueVal, OrV, Zero);
   }
   // select (c & b), a, b -> select b, (select ~c, true, a), false
-  if (match(CondVal, m_OneUse(m_c_And(m_Value(C), m_Specific(FalseVal)))) &&
-      isFreeToInvert(C, C->hasOneUse())) {
-    Value *NotC = Builder.CreateNot(C);
-    Value *OrV = Builder.CreateSelect(NotC, One, TrueVal);
-    return SelectInst::Create(FalseVal, OrV, Zero);
+  if (match(CondVal, m_OneUse(m_c_And(m_Value(C), m_Specific(FalseVal))))) {
+    if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {
+      Value *OrV = Builder.CreateSelect(NotC, One, TrueVal);
+      return SelectInst::Create(FalseVal, OrV, Zero);
+    }
   }
   // select (a | c), a, b -> select a, true, (select ~c, b, false)
-  if (match(CondVal, m_OneUse(m_c_Or(m_Specific(TrueVal), m_Value(C)))) &&
-      isFreeToInvert(C, C->hasOneUse())) {
-    Value *NotC = Builder.CreateNot(C);
-    Value *AndV = Builder.CreateSelect(NotC, FalseVal, Zero);
-    return SelectInst::Create(TrueVal, One, AndV);
+  if (match(CondVal, m_OneUse(m_c_Or(m_Specific(TrueVal), m_Value(C))))) {
+    if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {
+      Value *AndV = Builder.CreateSelect(NotC, FalseVal, Zero);
+      return SelectInst::Create(TrueVal, One, AndV);
+    }
   }
   // select (c & ~b), a, b -> select b, true, (select c, a, false)
   if (match(CondVal,
diff --git a/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
index 0ea645561696a50..b89cbefb0a70340 100644
--- a/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
+++ b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name V
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 ; https://bugs.llvm.org/show_bug.cgi?id=38446
@@ -22,10 +22,10 @@ declare i1 @gen1()
 
 define i1 @positive_easyinvert(i16 %x, i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[X:%.*]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
-; CHECK-NEXT:    ret i1 [[TMP4]]
+; CHECK-NEXT:    [[VTMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], -1
+; CHECK-NEXT:    [[VTMP4:%.*]] = xor i1 [[VTMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[VTMP4]]
 ;
   %tmp1 = icmp slt i16 %x, 0
   %tmp2 = icmp slt i8 %y, 0
@@ -36,24 +36,24 @@ define i1 @positive_easyinvert(i16 %x, i8 %y) {
 
 define i1 @positive_easyinvert0(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert0(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
-; CHECK-NEXT:    ret i1 [[TMP4]]
+; CHECK-NEXT:    [[VTMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[VTMP4:%.*]] = xor i1 [[TMP1]], [[VTMP1]]
+; CHECK-NEXT:    ret i1 [[VTMP4]]
 ;
   %tmp1 = call i1 @gen1()
-  %tmp2 = icmp slt i8 %y, 0
-  %tmp3 = xor i1 %tmp2, %tmp1
+  %cond = icmp slt i8 %y, 0
+  %tmp3 = xor i1 %cond, %tmp1
   %tmp4 = xor i1 %tmp3, true
   ret i1 %tmp4
 }
 
 define i1 @positive_easyinvert1(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert1(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
-; CHECK-NEXT:    ret i1 [[TMP4]]
+; CHECK-NEXT:    [[VTMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[VTMP4:%.*]] = xor i1 [[VTMP1]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[VTMP4]]
 ;
   %tmp1 = call i1 @gen1()
   %tmp2 = icmp slt i8 %y, 0
@@ -70,12 +70,12 @@ declare void @use1(i1)
 
 define i1 @oneuse_easyinvert_0(i8 %y) {
 ; CHECK-LABEL: @oneuse_easyinvert_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
-; CHECK-NEXT:    call void @use1(i1 [[TMP2]])
-; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
-; CHECK-NEXT:    ret i1 [[TMP4]]
+; CHECK-NEXT:    [[VTMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[VTMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @use1(i1 [[VTMP2]])
+; CHECK-NEXT:    [[VTMP3:%.*]] = xor i1 [[VTMP1]], [[VTMP2]]
+; CHECK-NEXT:    [[VTMP4:%.*]] = xor i1 [[VTMP3]], true
+; CHECK-NEXT:    ret i1 [[VTMP4]]
 ;
   %tmp1 = call i1 @gen1()
   %tmp2 = icmp slt i8 %y, 0
@@ -87,12 +87,12 @@ define i1 @oneuse_easyinvert_0(i8 %y) {
 
 define i1 @oneuse_easyinvert_1(i8 %y) {
 ; CHECK-LABEL: @oneuse_easyinvert_1(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    call void @use1(i1 [[TMP3]])
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
-; CHECK-NEXT:    ret i1 [[TMP4]]
+; CHECK-NEXT:    [[VTMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[VTMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
+; CHECK-NEXT:    [[VTMP3:%.*]] = xor i1 [[VTMP1]], [[VTMP2]]
+; CHECK-NEXT:    call void @use1(i1 [[VTMP3]])
+; CHECK-NEXT:    [[VTMP4:%.*]] = xor i1 [[VTMP3]], true
+; CHECK-NEXT:    ret i1 [[VTMP4]]
 ;
   %tmp1 = call i1 @gen1()
   %tmp2 = icmp slt i8 %y, 0
@@ -104,13 +104,13 @@ define i1 @oneuse_easyinvert_1(i8 %y) {
 
 define i1 @oneuse_easyinvert_2(i8 %y) {
 ; CHECK-LABEL: @oneuse_easyinvert_2(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
-; CHECK-NEXT:    call void @use1(i1 [[TMP2]])
-; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    call void @use1(i1 [[TMP3]])
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
-; CHECK-NEXT:    ret i1 [[TMP4]]
+; CHECK-NEXT:    [[VTMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[VTMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @use1(i1 [[VTMP2]])
+; CHECK-NEXT:    [[VTMP3:%.*]] = xor i1 [[VTMP1]], [[VTMP2]]
+; CHECK-NEXT:    call void @use1(i1 [[VTMP3]])
+; CHECK-NEXT:    [[VTMP4:%.*]] = xor i1 [[VTMP3]], true
+; CHECK-NEXT:    ret i1 [[VTMP4]]
 ;
   %tmp1 = call i1 @gen1()
   %tmp2 = icmp slt i8 %y, 0
@@ -128,9 +128,9 @@ define i1 @oneuse_easyinvert_2(i8 %y) {
 ; Not easily invertible.
 define i32 @negative(i32 %x, i32 %y) {
 ; CHECK-LABEL: @negative(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], -1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[VTMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[VTMP2:%.*]] = xor i32 [[VTMP1]], -1
+; CHECK-NEXT:    ret i32 [[VTMP2]]
 ;
   %tmp1 = xor i32 %x, %y
   %tmp2 = xor i32 %tmp1, -1
diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
index d3b6a5801b53b6a..c6fe6a21cb91759 100644
--- a/llvm/test/Transforms/InstCombine/free-inversion.ll
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -11,8 +11,8 @@ declare void @use.i8(i8)
 define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @xor_1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
-; CHECK-NEXT:    [[B_NOT:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
-; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[B_NOT]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_BA:%.*]] = xor i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_BA]]
 ;
   %nx = xor i8 %x, -1
@@ -26,8 +26,8 @@ define i8 @xor_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 define i8 @xor_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @xor_2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
-; CHECK-NEXT:    [[B_NOT:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
-; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[B_NOT]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = xor i8 [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1
diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
index 74b8bfab82ed48e..095ac5b27f59635 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
@@ -131,7 +131,7 @@ define i1 @PR46561(i1 %a, i1 %x, i1 %y, i8 %z) {
 ; CHECK-NEXT:    [[MULBOOL:%.*]] = and i1 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[TMP1]], [[MULBOOL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[MULBOOL]], [[TMP1]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
 ; CHECK-NEXT:    [[P:%.*]] = phi i1 [ [[TMP2]], [[COND_TRUE]] ], [ false, [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 6df9bb06ca56d2e..4ff5c714bcd4516 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -1263,7 +1263,7 @@ define i8 @freeToInvert(i8 %x, i8 %y, i8 %z) {
 ; CHECK-NEXT:    call void @use(i8 [[NY]])
 ; CHECK-NEXT:    call void @use(i8 [[NZ]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[Y]])
-; CHECK-NEXT:    [[NOT:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[Z]])
+; CHECK-NEXT:    [[NOT:%.*]] = call i8 @llvm.smax.i8(i8 [[Z]], i8 [[TMP1]])
 ; CHECK-NEXT:    ret i8 [[NOT]]
 ;
   %nx = xor i8 %x, -1

>From 8772bb06952d4ada8377a9e4ce67ef2db0cf18b3 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sat, 18 Nov 2023 23:36:31 -0600
Subject: [PATCH 7/7] [InstCombine] Add transform for `~X + ~Y)` -> `-2 - Y -
 X`

Proof: https://alive2.llvm.org/ce/z/36FySK
---
 .../InstCombine/InstCombineAddSub.cpp         | 23 +++++++++++++++++++
 .../Transforms/InstCombine/free-inversion.ll  |  9 ++++----
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index e38ea7f62b9e6aa..5bd6002b7670102 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1662,6 +1662,29 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
                                     m_c_UMin(m_Deferred(A), m_Deferred(B))))))
     return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, A, B, &I);
 
+  // (~X) + (~Y) --> (-2 - Y) - X
+  {
+    auto m_AddSubInvertable = m_CombineOr(m_Add(m_Value(), m_ImmConstant()),
+                                          m_Sub(m_ImmConstant(), m_Value()));
+    // To ensure we can save instructions we need to ensure that we either
+    // consume both LHS/RHS (i.e they have a `not`) or consume one `not` and
+    // have the other in a form that can combine with the `-2 - Expr`.
+    bool ConsumesLHS, ConsumesRHS;
+    if (isFreeToInvert(LHS, LHS->hasOneUse(), ConsumesLHS) &&
+        isFreeToInvert(RHS, RHS->hasOneUse(), ConsumesRHS) &&
+        (ConsumesLHS || ConsumesRHS) &&
+        (ConsumesLHS || match(LHS, m_AddSubInvertable)) &&
+        (ConsumesRHS || match(RHS, m_AddSubInvertable))) {
+      Value *NotLHS = getFreelyInverted(LHS, LHS->hasOneUse(), &Builder);
+      Value *NotRHS = getFreelyInverted(RHS, RHS->hasOneUse(), &Builder);
+      assert(NotLHS != nullptr && NotRHS != nullptr &&
+             "isFreeToInvert desynced with getFreelyInverted");
+      Value *RHSMinus2 =
+          Builder.CreateSub(ConstantInt::get(RHS->getType(), -2), NotRHS);
+      return BinaryOperator::CreateSub(RHSMinus2, NotLHS);
+    }
+  }
+
   // TODO(jingyue): Consider willNotOverflowSignedAdd and
   // willNotOverflowUnsignedAdd to reduce the number of invocations of
   // computeKnownBits.
diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
index c6fe6a21cb91759..ea5894597997bc1 100644
--- a/llvm/test/Transforms/InstCombine/free-inversion.ll
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -117,11 +117,10 @@ define i8 @sub_1(i8 %a, i1 %c, i8 %x, i8 %y) {
 
 define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_2(
-; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
-; CHECK-NEXT:    [[YY:%.*]] = xor i8 [[Y:%.*]], 123
-; CHECK-NEXT:    [[B:%.*]] = select i1 [[C:%.*]], i8 [[NX]], i8 [[YY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[A:%.*]], -1
-; CHECK-NEXT:    [[NOT_AB:%.*]] = add i8 [[B]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = sub i8 -2, [[TMP3]]
 ; CHECK-NEXT:    ret i8 [[NOT_AB]]
 ;
   %nx = xor i8 %x, -1



More information about the llvm-commits mailing list