[llvm] [InstCombine] Combine or-disjoint (and->mul), (and->mul) to and->mul (PR #136013)

Thu Apr 17 13:27:29 PDT 2025

https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/136013

>From 0a3e0c1d5ec047f23afd89f6ec9958f80c1b30af Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 9 Apr 2025 14:44:11 -0700
Subject: [PATCH 1/7] [InstCombine] Combine and->cmp->sel->or-disjoint into
 and->mul

Change-Id: Id45315f1e5f71077800d3a8141b85bb3b5d8f38a
---
 .../InstCombine/InstCombineAndOrXor.cpp       |  42 +++++++
 llvm/test/Transforms/InstCombine/or.ll        | 114 +++++++++++++++++-
 2 files changed, 152 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 6cc241781d112..6dc4b97686f97 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3643,6 +3643,48 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
             foldAddLikeCommutative(I.getOperand(1), I.getOperand(0),
                                    /*NSW=*/true, /*NUW=*/true))
       return R;
+
+    Value *Cond0 = nullptr, *Cond1 = nullptr;
+    ConstantInt *Op0True = nullptr, *Op0False = nullptr;
+    ConstantInt *Op1True = nullptr, *Op1False = nullptr;
+
+    //  (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
+    if (match(I.getOperand(0), m_Select(m_Value(Cond0), m_ConstantInt(Op0True),
+                                        m_ConstantInt(Op0False))) &&
+        match(I.getOperand(1), m_Select(m_Value(Cond1), m_ConstantInt(Op1True),
+                                        m_ConstantInt(Op1False))) &&
+        Op0True->isZero() && Op1True->isZero() &&
+        Op0False->getValue().tryZExtValue() &&
+        Op1False->getValue().tryZExtValue()) {
+      CmpPredicate Pred0, Pred1;
+      Value *CmpOp0 = nullptr, *CmpOp1 = nullptr;
+      ConstantInt *Op0Cond = nullptr, *Op1Cond = nullptr;
+      if (match(Cond0,
+                m_c_ICmp(Pred0, m_Value(CmpOp0), m_ConstantInt(Op0Cond))) &&
+          match(Cond1,
+                m_c_ICmp(Pred1, m_Value(CmpOp1), m_ConstantInt(Op1Cond))) &&
+          Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ &&
+          Op0Cond->isZero() && Op1Cond->isZero()) {
+        Value *AndSrc0 = nullptr, *AndSrc1 = nullptr;
+        ConstantInt *BitSel0 = nullptr, *BitSel1 = nullptr;
+        if (match(CmpOp0, m_And(m_Value(AndSrc0), m_ConstantInt(BitSel0))) &&
+            match(CmpOp1, m_And(m_Value(AndSrc1), m_ConstantInt(BitSel1))) &&
+            AndSrc0 == AndSrc1 && BitSel0->getValue().tryZExtValue() &&
+            BitSel1->getValue().tryZExtValue()) {
+          unsigned Out0 = Op0False->getValue().getZExtValue();
+          unsigned Out1 = Op1False->getValue().getZExtValue();
+          unsigned Sel0 = BitSel0->getValue().getZExtValue();
+          unsigned Sel1 = BitSel1->getValue().getZExtValue();
+          if (!(Out0 % Sel0) && !(Out1 % Sel1) &&
+              ((Out0 / Sel0) == (Out1 / Sel1))) {
+            auto NewAnd = Builder.CreateAnd(
+                AndSrc0, ConstantInt::get(AndSrc0->getType(), Sel0 + Sel1));
+            return BinaryOperator::CreateMul(
+                NewAnd, ConstantInt::get(NewAnd->getType(), (Out1 / Sel1)));
+          }
+        }
+      }
+    }
   }
 
   Value *X, *Y;
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 95f89e4ce11cd..f2b21ca966592 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -1281,10 +1281,10 @@ define <16 x i1> @test51(<16 x i1> %arg, <16 x i1> %arg1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i1> [[ARG:%.*]], <16 x i1> [[ARG1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:    ret <16 x i1> [[TMP3]]
 ;
-  %tmp = and <16 x i1> %arg, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
-  %tmp2 = and <16 x i1> %arg1, <i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>
-  %tmp3 = or <16 x i1> %tmp, %tmp2
-  ret <16 x i1> %tmp3
+  %temp = and <16 x i1> %arg, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
+  %temp2 = and <16 x i1> %arg1, <i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>
+  %temp3 = or <16 x i1> %temp, %temp2
+  ret <16 x i1> %temp3
 }
 
 ; This would infinite loop because it reaches a transform
@@ -2035,3 +2035,109 @@ define i32 @or_xor_and_commuted3(i32 %x, i32 %y, i32 %z) {
   %or1 = or i32 %xor, %yy
   ret i32 %or1
 }
+
+define i32 @add_select_cmp_and1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and2(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 5
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 4
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 288
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and3(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[BITOP2:%.*]] = and i32 [[IN]], 4
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[BITOP2]], 0
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[SEL2]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %temp = or disjoint i32 %sel0, %sel1
+  %bitop2 = and i32 %in, 4
+  %cmp2 = icmp eq i32 %bitop2, 0
+  %sel2 = select i1 %cmp2, i32 0, i32 288
+  %out = or disjoint i32 %temp, %sel2
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and4(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %temp = or disjoint i32 %sel0, %sel1
+  %bitop2 = and i32 %in, 4
+  %cmp2 = icmp eq i32 %bitop2, 0
+  %bitop3 = and i32 %in, 8
+  %cmp3 = icmp eq i32 %bitop3, 0
+  %sel2 = select i1 %cmp2, i32 0, i32 288
+  %sel3 = select i1 %cmp3, i32 0, i32 576
+  %temp2 = or disjoint i32 %sel2, %sel3
+  %out = or disjoint i32 %temp, %temp2
+  ret i32 %out
+}
+
+
+
+define i32 @add_select_cmp_and_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_mismatch(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 288
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 3
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 288
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}

>From b5581cc434edf8cca6cd7d92ea921dc5a6315fd2 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 14 Apr 2025 12:23:06 -0700
Subject: [PATCH 2/7] Address review comments

Change-Id: I630d506375b0eb4b16dad1437bff2da357be2059
---
 llvm/include/llvm/Analysis/CmpInstAnalysis.h  |   4 +-
 llvm/lib/Analysis/CmpInstAnalysis.cpp         |  28 +++-
 .../InstCombine/InstCombineAndOrXor.cpp       |  81 +++++++-----
 llvm/test/Transforms/InstCombine/or.ll        | 124 +++++++++++++++++-
 4 files changed, 195 insertions(+), 42 deletions(-)

diff --git a/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/llvm/include/llvm/Analysis/CmpInstAnalysis.h
index aeda58ac7535d..e8a9060b8e882 100644
--- a/llvm/include/llvm/Analysis/CmpInstAnalysis.h
+++ b/llvm/include/llvm/Analysis/CmpInstAnalysis.h
@@ -105,8 +105,8 @@ namespace llvm {
   /// Unless \p AllowNonZeroC is true, C will always be 0.
   std::optional<DecomposedBitTest>
   decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
-                       bool LookThroughTrunc = true,
-                       bool AllowNonZeroC = false);
+                       bool LookThroughTrunc = true, bool AllowNonZeroC = false,
+                       bool LookThruBitSel = false);
 
   /// Decompose an icmp into the form ((X & Mask) pred C) if
   /// possible. Unless \p AllowNonZeroC is true, C will always be 0.
diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp
index 5c0d1dd1c74b0..ebbc71ee20ec9 100644
--- a/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -75,11 +75,12 @@ Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy,
 
 std::optional<DecomposedBitTest>
 llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
-                           bool LookThruTrunc, bool AllowNonZeroC) {
+                           bool LookThruTrunc, bool AllowNonZeroC,
+                           bool LookThruBitSel) {
   using namespace PatternMatch;
 
   const APInt *OrigC;
-  if (!ICmpInst::isRelational(Pred) || !match(RHS, m_APIntAllowPoison(OrigC)))
+  if (!match(RHS, m_APIntAllowPoison(OrigC)))
     return std::nullopt;
 
   bool Inverted = false;
@@ -96,10 +97,27 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
     Pred = ICmpInst::getStrictPredicate(Pred);
   }
 
+  auto decomposeBitMask =
+      [LHS,
+       LookThruBitSel](CmpInst::Predicate Pred,
+                       const APInt *OrigC) -> std::optional<DecomposedBitTest> {
+    if (!LookThruBitSel)
+      return std::nullopt;
+
+    const APInt *AndC;
+    Value *AndVal;
+    std::optional<DecomposedBitTest> Result = std::nullopt;
+    if (match(LHS, m_And(m_Value(AndVal), m_APInt(AndC))))
+      Result = {AndVal /*X*/, Pred /*Pred*/, *AndC /*Mask*/, *OrigC /*C*/};
+
+    return Result;
+  };
+
   DecomposedBitTest Result;
+
   switch (Pred) {
   default:
-    llvm_unreachable("Unexpected predicate");
+    return decomposeBitMask(Pred, OrigC);
   case ICmpInst::ICMP_SLT: {
     // X < 0 is equivalent to (X & SignMask) != 0.
     if (C.isZero()) {
@@ -126,7 +144,7 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
       break;
     }
 
-    return std::nullopt;
+    return decomposeBitMask(Pred, OrigC);
   }
   case ICmpInst::ICMP_ULT:
     // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
@@ -145,7 +163,7 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
       break;
     }
 
-    return std::nullopt;
+    return decomposeBitMask(Pred, OrigC);
   }
 
   if (!AllowNonZeroC && !Result.C.isZero())
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 6dc4b97686f97..4fc0df749a36d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3645,42 +3645,61 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
       return R;
 
     Value *Cond0 = nullptr, *Cond1 = nullptr;
-    ConstantInt *Op0True = nullptr, *Op0False = nullptr;
-    ConstantInt *Op1True = nullptr, *Op1False = nullptr;
+    const APInt *Op0True = nullptr, *Op0False = nullptr;
+    const APInt *Op1True = nullptr, *Op1False = nullptr;
 
     //  (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
-    if (match(I.getOperand(0), m_Select(m_Value(Cond0), m_ConstantInt(Op0True),
-                                        m_ConstantInt(Op0False))) &&
-        match(I.getOperand(1), m_Select(m_Value(Cond1), m_ConstantInt(Op1True),
-                                        m_ConstantInt(Op1False))) &&
-        Op0True->isZero() && Op1True->isZero() &&
-        Op0False->getValue().tryZExtValue() &&
-        Op1False->getValue().tryZExtValue()) {
+    if (match(I.getOperand(0),
+              m_Select(m_Value(Cond0), m_APInt(Op0True), m_APInt(Op0False))) &&
+        match(I.getOperand(1),
+              m_Select(m_Value(Cond1), m_APInt(Op1True), m_APInt(Op1False))) &&
+        Op0True->isZero() && Op1True->isZero()) {
       CmpPredicate Pred0, Pred1;
-      Value *CmpOp0 = nullptr, *CmpOp1 = nullptr;
-      ConstantInt *Op0Cond = nullptr, *Op1Cond = nullptr;
-      if (match(Cond0,
-                m_c_ICmp(Pred0, m_Value(CmpOp0), m_ConstantInt(Op0Cond))) &&
-          match(Cond1,
-                m_c_ICmp(Pred1, m_Value(CmpOp1), m_ConstantInt(Op1Cond))) &&
-          Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ &&
-          Op0Cond->isZero() && Op1Cond->isZero()) {
-        Value *AndSrc0 = nullptr, *AndSrc1 = nullptr;
-        ConstantInt *BitSel0 = nullptr, *BitSel1 = nullptr;
-        if (match(CmpOp0, m_And(m_Value(AndSrc0), m_ConstantInt(BitSel0))) &&
-            match(CmpOp1, m_And(m_Value(AndSrc1), m_ConstantInt(BitSel1))) &&
-            AndSrc0 == AndSrc1 && BitSel0->getValue().tryZExtValue() &&
-            BitSel1->getValue().tryZExtValue()) {
-          unsigned Out0 = Op0False->getValue().getZExtValue();
-          unsigned Out1 = Op1False->getValue().getZExtValue();
-          unsigned Sel0 = BitSel0->getValue().getZExtValue();
-          unsigned Sel1 = BitSel1->getValue().getZExtValue();
-          if (!(Out0 % Sel0) && !(Out1 % Sel1) &&
-              ((Out0 / Sel0) == (Out1 / Sel1))) {
+
+      if (ICmpInst *ICL = dyn_cast<ICmpInst>(Cond0);
+          ICmpInst *ICR = dyn_cast<ICmpInst>(Cond1)) {
+        auto LHSDecompose =
+            decomposeBitTestICmp(ICL->getOperand(0), ICL->getOperand(1),
+                                 ICL->getPredicate(), true, true, true);
+        auto RHSDecompose =
+            decomposeBitTestICmp(ICR->getOperand(0), ICR->getOperand(1),
+                                 ICR->getPredicate(), true, true, true);
+        if (LHSDecompose && RHSDecompose &&
+            LHSDecompose->Pred == RHSDecompose->Pred &&
+            (LHSDecompose->Pred == ICmpInst::ICMP_EQ ||
+             LHSDecompose->Pred == ICmpInst::ICMP_NE) &&
+            ((LHSDecompose->Mask & RHSDecompose->Mask) ==
+             APInt::getZero(LHSDecompose->Mask.getBitWidth())) &&
+            LHSDecompose->C.isZero() && RHSDecompose->C.isZero() &&
+            !RHSDecompose->Mask.isNegative() &&
+            !LHSDecompose->Mask.isNegative() &&
+            RHSDecompose->Mask.isPowerOf2() &&
+            LHSDecompose->Mask.isPowerOf2()) {
+          std::pair<const APInt *, const APInt *> LHSInts;
+          std::pair<const APInt *, const APInt *> RHSInts;
+
+          if (LHSDecompose->Pred == ICmpInst::ICMP_EQ) {
+            LHSInts = {Op0False, Op0True};
+            RHSInts = {Op1False, Op1True};
+          } else {
+            LHSInts = {Op0True, Op0False};
+            RHSInts = {Op1True, Op1False};
+          }
+
+          if (!LHSInts.first->isNegative() && !RHSInts.first->isNegative() &&
+              LHSInts.second->isZero() && RHSInts.second->isZero() &&
+              LHSInts.first->urem(LHSDecompose->Mask).isZero() &&
+              RHSInts.first->urem(RHSDecompose->Mask).isZero() &&
+              LHSInts.first->udiv(LHSDecompose->Mask) ==
+                  RHSInts.first->udiv(RHSDecompose->Mask)) {
             auto NewAnd = Builder.CreateAnd(
-                AndSrc0, ConstantInt::get(AndSrc0->getType(), Sel0 + Sel1));
+                LHSDecompose->X,
+                ConstantInt::get(LHSDecompose->X->getType(),
+                                 (LHSDecompose->Mask + RHSDecompose->Mask)));
             return BinaryOperator::CreateMul(
-                NewAnd, ConstantInt::get(NewAnd->getType(), (Out1 / Sel1)));
+                NewAnd,
+                ConstantInt::get(NewAnd->getType(),
+                                 LHSInts.first->udiv(LHSDecompose->Mask)));
           }
         }
       }
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index f2b21ca966592..2d6a905ea520b 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -2119,13 +2119,11 @@ define i32 @add_select_cmp_and4(i32 %in) {
   ret i32 %out
 }
 
-
-
 define i32 @add_select_cmp_and_mismatch(i32 %in) {
 ; CHECK-LABEL: @add_select_cmp_and_mismatch(
 ; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
-; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 3
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
 ; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
 ; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 288
@@ -2134,10 +2132,128 @@ define i32 @add_select_cmp_and_mismatch(i32 %in) {
 ;
   %bitop0 = and i32 %in, 1
   %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 3
+  %bitop1 = and i32 %in, 2
   %cmp1 = icmp eq i32 %bitop1, 0
   %sel0 = select i1 %cmp0, i32 0, i32 72
   %sel1 = select i1 %cmp1, i32 0, i32 288
   %out = or disjoint i32 %sel0, %sel1
   ret i32 %out
 }
+
+define i32 @add_select_cmp_and_negative(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_negative(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[IN]], 2
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 -144
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, -2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 -144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_bitsel_overlap(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_bitsel_overlap(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 2
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 144
+; CHECK-NEXT:    ret i32 [[SEL0]]
+;
+  %bitop0 = and i32 %in, 2
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 144
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+; We cannot combine into and-mul, as %bitop1 may not be exactly 6
+
+define i32 @add_select_cmp_and_multbit_mask(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_multbit_mask(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 6
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 432
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 6
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 432
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+
+define <2 x i32> @add_select_cmp_vec(<2 x i32> %in) {
+; CHECK-LABEL: @add_select_cmp_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[IN:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw <2 x i32> [[TMP1]], splat (i32 72)
+; CHECK-NEXT:    ret <2 x i32> [[OUT]]
+;
+  %bitop0 = and <2 x i32> %in, <i32 1, i32 1>
+  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
+  %bitop1 = and <2 x i32> %in, <i32 2, i32 2>
+  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
+  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 72>
+  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 144, i32 144>
+  %out = or disjoint <2 x i32> %sel0, %sel1
+  ret <2 x i32> %out
+}
+
+define <2 x i32> @add_select_cmp_vec_poison(<2 x i32> %in) {
+; CHECK-LABEL: @add_select_cmp_vec_poison(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and <2 x i32> [[IN:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i32> [[BITOP0]], zeroinitializer
+; CHECK-NEXT:    [[BITOP1:%.*]] = and <2 x i32> [[IN]], splat (i32 2)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[BITOP1]], zeroinitializer
+; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> <i32 poison, i32 144>
+; CHECK-NEXT:    [[OUT:%.*]] = select <2 x i1> [[CMP0]], <2 x i32> [[SEL1]], <2 x i32> <i32 72, i32 poison>
+; CHECK-NEXT:    ret <2 x i32> [[OUT]]
+;
+  %bitop0 = and <2 x i32> %in, <i32 1, i32 1>
+  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
+  %bitop1 = and <2 x i32> %in, <i32 2, i32 2>
+  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
+  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 poison>
+  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 poison, i32 144>
+  %out = or disjoint <2 x i32> %sel0, %sel1
+  ret <2 x i32> %out
+}
+
+define <2 x i32> @add_select_cmp_vec_nonunique(<2 x i32> %in) {
+; CHECK-LABEL: @add_select_cmp_vec_nonunique(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and <2 x i32> [[IN:%.*]], <i32 1, i32 2>
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i32> [[BITOP0]], zeroinitializer
+; CHECK-NEXT:    [[BITOP1:%.*]] = and <2 x i32> [[IN]], <i32 4, i32 8>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[BITOP1]], zeroinitializer
+; CHECK-NEXT:    [[SEL0:%.*]] = select <2 x i1> [[CMP0]], <2 x i32> zeroinitializer, <2 x i32> <i32 72, i32 144>
+; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> <i32 288, i32 576>
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint <2 x i32> [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret <2 x i32> [[OUT]]
+;
+  %bitop0 = and <2 x i32> %in, <i32 1, i32 2>
+  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
+  %bitop1 = and <2 x i32> %in, <i32 4, i32 8>
+  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
+  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 144>
+  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 288, i32 576>
+  %out = or disjoint <2 x i32> %sel0, %sel1
+  ret <2 x i32> %out
+}

>From 1475b40e802994e6e19b12bd7d86cb4b91d93700 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 15 Apr 2025 11:41:05 -0700
Subject: [PATCH 3/7] Review comments 2

Change-Id: I24786ee6dc53a33fc7afbd80d226cda4e4a4df03
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 49 ++++++++-----------
 llvm/test/Transforms/InstCombine/or.ll        | 25 +++++++++-
 2 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4fc0df749a36d..4bd32fba1b702 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3645,61 +3645,52 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
       return R;
 
     Value *Cond0 = nullptr, *Cond1 = nullptr;
-    const APInt *Op0True = nullptr, *Op0False = nullptr;
-    const APInt *Op1True = nullptr, *Op1False = nullptr;
+    const APInt *Op0Eq = nullptr, *Op0Ne = nullptr;
+    const APInt *Op1Eq = nullptr, *Op1Ne = nullptr;
 
     //  (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
     if (match(I.getOperand(0),
-              m_Select(m_Value(Cond0), m_APInt(Op0True), m_APInt(Op0False))) &&
+              m_Select(m_Value(Cond0), m_APInt(Op0Eq), m_APInt(Op0Ne))) &&
         match(I.getOperand(1),
-              m_Select(m_Value(Cond1), m_APInt(Op1True), m_APInt(Op1False))) &&
-        Op0True->isZero() && Op1True->isZero()) {
+              m_Select(m_Value(Cond1), m_APInt(Op1Eq), m_APInt(Op1Ne)))) {
       CmpPredicate Pred0, Pred1;
 
       if (ICmpInst *ICL = dyn_cast<ICmpInst>(Cond0);
           ICmpInst *ICR = dyn_cast<ICmpInst>(Cond1)) {
         auto LHSDecompose =
             decomposeBitTestICmp(ICL->getOperand(0), ICL->getOperand(1),
-                                 ICL->getPredicate(), true, true, true);
+                                 ICL->getPredicate(), true, false, true);
         auto RHSDecompose =
             decomposeBitTestICmp(ICR->getOperand(0), ICR->getOperand(1),
-                                 ICR->getPredicate(), true, true, true);
+                                 ICR->getPredicate(), true, false, true);
         if (LHSDecompose && RHSDecompose &&
+            LHSDecompose->X == RHSDecompose->X &&
             LHSDecompose->Pred == RHSDecompose->Pred &&
-            (LHSDecompose->Pred == ICmpInst::ICMP_EQ ||
-             LHSDecompose->Pred == ICmpInst::ICMP_NE) &&
-            ((LHSDecompose->Mask & RHSDecompose->Mask) ==
-             APInt::getZero(LHSDecompose->Mask.getBitWidth())) &&
-            LHSDecompose->C.isZero() && RHSDecompose->C.isZero() &&
+            (ICmpInst::isEquality(LHSDecompose->Pred)) &&
             !RHSDecompose->Mask.isNegative() &&
             !LHSDecompose->Mask.isNegative() &&
             RHSDecompose->Mask.isPowerOf2() &&
-            LHSDecompose->Mask.isPowerOf2()) {
+            LHSDecompose->Mask.isPowerOf2() &&
+            LHSDecompose->Mask != RHSDecompose->Mask) {
           std::pair<const APInt *, const APInt *> LHSInts;
           std::pair<const APInt *, const APInt *> RHSInts;
-
-          if (LHSDecompose->Pred == ICmpInst::ICMP_EQ) {
-            LHSInts = {Op0False, Op0True};
-            RHSInts = {Op1False, Op1True};
-          } else {
-            LHSInts = {Op0True, Op0False};
-            RHSInts = {Op1True, Op1False};
+          if (LHSDecompose->Pred == ICmpInst::ICMP_NE) {
+            std::swap(Op0Eq, Op0Ne);
+            std::swap(Op1Eq, Op1Ne);
           }
 
-          if (!LHSInts.first->isNegative() && !RHSInts.first->isNegative() &&
-              LHSInts.second->isZero() && RHSInts.second->isZero() &&
-              LHSInts.first->urem(LHSDecompose->Mask).isZero() &&
-              RHSInts.first->urem(RHSDecompose->Mask).isZero() &&
-              LHSInts.first->udiv(LHSDecompose->Mask) ==
-                  RHSInts.first->udiv(RHSDecompose->Mask)) {
+          if (!Op0Ne->isNegative() && !Op1Ne->isNegative() && Op0Eq->isZero() &&
+              Op1Eq->isZero() && Op0Ne->urem(LHSDecompose->Mask).isZero() &&
+              Op1Ne->urem(RHSDecompose->Mask).isZero() &&
+              Op0Ne->udiv(LHSDecompose->Mask) ==
+                  Op1Ne->udiv(RHSDecompose->Mask)) {
             auto NewAnd = Builder.CreateAnd(
                 LHSDecompose->X,
                 ConstantInt::get(LHSDecompose->X->getType(),
                                  (LHSDecompose->Mask + RHSDecompose->Mask)));
             return BinaryOperator::CreateMul(
-                NewAnd,
-                ConstantInt::get(NewAnd->getType(),
-                                 LHSInts.first->udiv(LHSDecompose->Mask)));
+                NewAnd, ConstantInt::get(NewAnd->getType(),
+                                         Op0Ne->udiv(LHSDecompose->Mask)));
           }
         }
       }
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 2d6a905ea520b..02548ac9e7883 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -2119,8 +2119,8 @@ define i32 @add_select_cmp_and4(i32 %in) {
   ret i32 %out
 }
 
-define i32 @add_select_cmp_and_mismatch(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and_mismatch(
+define i32 @add_select_cmp_and_const_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_const_mismatch(
 ; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
 ; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
@@ -2140,6 +2140,27 @@ define i32 @add_select_cmp_and_mismatch(i32 %in) {
   ret i32 %out
 }
 
+define i32 @add_select_cmp_and_value_mismatch(i32 %in, i32 %in1) {
+; CHECK-LABEL: @add_select_cmp_and_value_mismatch(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN1:%.*]], 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in1, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
 define i32 @add_select_cmp_and_negative(i32 %in) {
 ; CHECK-LABEL: @add_select_cmp_and_negative(
 ; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1

>From 6867a8734f40a6df3dca2467d45949bb148e0c23 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 15 Apr 2025 15:19:28 -0700
Subject: [PATCH 4/7] Remove unused variables

Change-Id: I897641e07a0e035c106f8d0d018f8a1b7a92aa6b
---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4bd32fba1b702..a550099be5459 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3672,8 +3672,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
             RHSDecompose->Mask.isPowerOf2() &&
             LHSDecompose->Mask.isPowerOf2() &&
             LHSDecompose->Mask != RHSDecompose->Mask) {
-          std::pair<const APInt *, const APInt *> LHSInts;
-          std::pair<const APInt *, const APInt *> RHSInts;
           if (LHSDecompose->Pred == ICmpInst::ICMP_NE) {
             std::swap(Op0Eq, Op0Ne);
             std::swap(Op1Eq, Op1Ne);

>From 71ea9f61d858c40f7a204957d4cea9f98504a5f0 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 15 Apr 2025 16:09:23 -0700
Subject: [PATCH 5/7] Review comments 3

Change-Id: I2a995aa22358d9d532a7f839dd64d8c8633990fa
---
 llvm/include/llvm/Analysis/CmpInstAnalysis.h  | 12 ++--
 llvm/lib/Analysis/CmpInstAnalysis.cpp         | 52 +++++++-------
 .../InstCombine/InstCombineAndOrXor.cpp       | 72 +++++++++----------
 llvm/test/Transforms/InstCombine/or.ll        | 47 ++++++++++--
 4 files changed, 112 insertions(+), 71 deletions(-)

diff --git a/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/llvm/include/llvm/Analysis/CmpInstAnalysis.h
index e8a9060b8e882..a796a38feff88 100644
--- a/llvm/include/llvm/Analysis/CmpInstAnalysis.h
+++ b/llvm/include/llvm/Analysis/CmpInstAnalysis.h
@@ -95,24 +95,28 @@ namespace llvm {
   /// Represents the operation icmp (X & Mask) pred C, where pred can only be
   /// eq or ne.
   struct DecomposedBitTest {
-    Value *X;
+    Value *X = nullptr;
     CmpInst::Predicate Pred;
     APInt Mask;
     APInt C;
   };
 
   /// Decompose an icmp into the form ((X & Mask) pred C) if possible.
-  /// Unless \p AllowNonZeroC is true, C will always be 0.
+  /// Unless \p AllowNonZeroC is true, C will always be 0. If \p
+  /// DecomposeBitMask is specified, then, for equality predicates, this will
+  /// decompose bitmasking (e.g. implemented via `and`).
   std::optional<DecomposedBitTest>
   decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
                        bool LookThroughTrunc = true, bool AllowNonZeroC = false,
-                       bool LookThruBitSel = false);
+                       bool DecomposeBitMask = false);
 
   /// Decompose an icmp into the form ((X & Mask) pred C) if
   /// possible. Unless \p AllowNonZeroC is true, C will always be 0.
+  /// If \p DecomposeBitMask is specified, then, for equality predicates, this
+  /// will decompose bitmasking (e.g. implemented via `and`).
   std::optional<DecomposedBitTest>
   decomposeBitTest(Value *Cond, bool LookThroughTrunc = true,
-                   bool AllowNonZeroC = false);
+                   bool AllowNonZeroC = false, bool DecomposeBitMask = false);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp
index ebbc71ee20ec9..1819a93be0bfd 100644
--- a/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -76,11 +76,12 @@ Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy,
 std::optional<DecomposedBitTest>
 llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
                            bool LookThruTrunc, bool AllowNonZeroC,
-                           bool LookThruBitSel) {
+                           bool DecomposeBitMask) {
   using namespace PatternMatch;
 
   const APInt *OrigC;
-  if (!match(RHS, m_APIntAllowPoison(OrigC)))
+  if ((ICmpInst::isEquality(Pred) && !DecomposeBitMask) ||
+      !match(RHS, m_APIntAllowPoison(OrigC)))
     return std::nullopt;
 
   bool Inverted = false;
@@ -97,27 +98,11 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
     Pred = ICmpInst::getStrictPredicate(Pred);
   }
 
-  auto decomposeBitMask =
-      [LHS,
-       LookThruBitSel](CmpInst::Predicate Pred,
-                       const APInt *OrigC) -> std::optional<DecomposedBitTest> {
-    if (!LookThruBitSel)
-      return std::nullopt;
-
-    const APInt *AndC;
-    Value *AndVal;
-    std::optional<DecomposedBitTest> Result = std::nullopt;
-    if (match(LHS, m_And(m_Value(AndVal), m_APInt(AndC))))
-      Result = {AndVal /*X*/, Pred /*Pred*/, *AndC /*Mask*/, *OrigC /*C*/};
-
-    return Result;
-  };
-
   DecomposedBitTest Result;
 
   switch (Pred) {
   default:
-    return decomposeBitMask(Pred, OrigC);
+    return std::nullopt;
   case ICmpInst::ICMP_SLT: {
     // X < 0 is equivalent to (X & SignMask) != 0.
     if (C.isZero()) {
@@ -144,9 +129,9 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
       break;
     }
 
-    return decomposeBitMask(Pred, OrigC);
+    return std::nullopt;
   }
-  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULT: {
     // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
     if (C.isPowerOf2()) {
       Result.Mask = -C;
@@ -163,7 +148,20 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
       break;
     }
 
-    return decomposeBitMask(Pred, OrigC);
+    return std::nullopt;
+  }
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_NE: {
+    assert(DecomposeBitMask);
+    const APInt *AndC;
+    Value *AndVal;
+    if (match(LHS, m_And(m_Value(AndVal), m_APInt(AndC)))) {
+      Result = {AndVal /*X*/, Pred /*Pred*/, *AndC /*Mask*/, *OrigC /*C*/};
+      break;
+    }
+
+    return std::nullopt;
+  }
   }
 
   if (!AllowNonZeroC && !Result.C.isZero())
@@ -177,15 +175,17 @@ llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred,
     Result.X = X;
     Result.Mask = Result.Mask.zext(X->getType()->getScalarSizeInBits());
     Result.C = Result.C.zext(X->getType()->getScalarSizeInBits());
-  } else {
+  } else if (!Result.X) {
     Result.X = LHS;
   }
 
   return Result;
 }
 
-std::optional<DecomposedBitTest>
-llvm::decomposeBitTest(Value *Cond, bool LookThruTrunc, bool AllowNonZeroC) {
+std::optional<DecomposedBitTest> llvm::decomposeBitTest(Value *Cond,
+                                                        bool LookThruTrunc,
+                                                        bool AllowNonZeroC,
+                                                        bool DecomposeBitMask) {
   using namespace PatternMatch;
   if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) {
     // Don't allow pointers. Splat vectors are fine.
@@ -193,7 +193,7 @@ llvm::decomposeBitTest(Value *Cond, bool LookThruTrunc, bool AllowNonZeroC) {
       return std::nullopt;
     return decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
                                 ICmp->getPredicate(), LookThruTrunc,
-                                AllowNonZeroC);
+                                AllowNonZeroC, DecomposeBitMask);
   }
   Value *X;
   if (Cond->getType()->isIntOrIntVectorTy(1) &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a550099be5459..8cfd2f5bfc2e6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -180,9 +180,11 @@ static unsigned conjugateICmpMask(unsigned Mask) {
 
 // Adapts the external decomposeBitTestICmp for local use.
 static bool decomposeBitTestICmp(Value *Cond, CmpInst::Predicate &Pred,
-                                 Value *&X, Value *&Y, Value *&Z) {
+                                 Value *&X, Value *&Y, Value *&Z,
+                                 bool DecomposeBitMask = false) {
   auto Res = llvm::decomposeBitTest(Cond, /*LookThroughTrunc=*/true,
-                                    /*AllowNonZeroC=*/true);
+                                    /*AllowNonZeroC=*/true,
+                                    /*DecomposeBitMask=*/DecomposeBitMask);
   if (!Res)
     return false;
 
@@ -3655,41 +3657,37 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
               m_Select(m_Value(Cond1), m_APInt(Op1Eq), m_APInt(Op1Ne)))) {
       CmpPredicate Pred0, Pred1;
 
-      if (ICmpInst *ICL = dyn_cast<ICmpInst>(Cond0);
-          ICmpInst *ICR = dyn_cast<ICmpInst>(Cond1)) {
-        auto LHSDecompose =
-            decomposeBitTestICmp(ICL->getOperand(0), ICL->getOperand(1),
-                                 ICL->getPredicate(), true, false, true);
-        auto RHSDecompose =
-            decomposeBitTestICmp(ICR->getOperand(0), ICR->getOperand(1),
-                                 ICR->getPredicate(), true, false, true);
-        if (LHSDecompose && RHSDecompose &&
-            LHSDecompose->X == RHSDecompose->X &&
-            LHSDecompose->Pred == RHSDecompose->Pred &&
-            (ICmpInst::isEquality(LHSDecompose->Pred)) &&
-            !RHSDecompose->Mask.isNegative() &&
-            !LHSDecompose->Mask.isNegative() &&
-            RHSDecompose->Mask.isPowerOf2() &&
-            LHSDecompose->Mask.isPowerOf2() &&
-            LHSDecompose->Mask != RHSDecompose->Mask) {
-          if (LHSDecompose->Pred == ICmpInst::ICMP_NE) {
-            std::swap(Op0Eq, Op0Ne);
-            std::swap(Op1Eq, Op1Ne);
-          }
-
-          if (!Op0Ne->isNegative() && !Op1Ne->isNegative() && Op0Eq->isZero() &&
-              Op1Eq->isZero() && Op0Ne->urem(LHSDecompose->Mask).isZero() &&
-              Op1Ne->urem(RHSDecompose->Mask).isZero() &&
-              Op0Ne->udiv(LHSDecompose->Mask) ==
-                  Op1Ne->udiv(RHSDecompose->Mask)) {
-            auto NewAnd = Builder.CreateAnd(
-                LHSDecompose->X,
-                ConstantInt::get(LHSDecompose->X->getType(),
-                                 (LHSDecompose->Mask + RHSDecompose->Mask)));
-            return BinaryOperator::CreateMul(
-                NewAnd, ConstantInt::get(NewAnd->getType(),
-                                         Op0Ne->udiv(LHSDecompose->Mask)));
-          }
+      auto LHSDecompose =
+          decomposeBitTest(Cond0, /*LookThruTrunc=*/true,
+                           /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
+      auto RHSDecompose =
+          decomposeBitTest(Cond1, /*LookThruTrunc=*/true,
+                           /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
+
+      if (LHSDecompose && RHSDecompose && LHSDecompose->X == RHSDecompose->X &&
+          LHSDecompose->Pred == RHSDecompose->Pred &&
+          (ICmpInst::isEquality(LHSDecompose->Pred)) &&
+          !RHSDecompose->Mask.isNegative() &&
+          !LHSDecompose->Mask.isNegative() && RHSDecompose->Mask.isPowerOf2() &&
+          LHSDecompose->Mask.isPowerOf2() &&
+          LHSDecompose->Mask != RHSDecompose->Mask) {
+        if (LHSDecompose->Pred == ICmpInst::ICMP_NE) {
+          std::swap(Op0Eq, Op0Ne);
+          std::swap(Op1Eq, Op1Ne);
+        }
+        if (!Op0Ne->isNegative() && !Op1Ne->isNegative() && Op0Eq->isZero() &&
+            Op1Eq->isZero() && Op0Ne->urem(LHSDecompose->Mask).isZero() &&
+            Op1Ne->urem(RHSDecompose->Mask).isZero() &&
+            Op0Ne->udiv(LHSDecompose->Mask) ==
+                Op1Ne->udiv(RHSDecompose->Mask)) {
+          auto NewAnd = Builder.CreateAnd(
+              LHSDecompose->X,
+              ConstantInt::get(LHSDecompose->X->getType(),
+                               (LHSDecompose->Mask + RHSDecompose->Mask)));
+
+          return BinaryOperator::CreateMul(
+              NewAnd, ConstantInt::get(NewAnd->getType(),
+                                       Op0Ne->udiv(LHSDecompose->Mask)));
         }
       }
     }
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 02548ac9e7883..e8514590a6618 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -2095,11 +2095,11 @@ define i32 @add_select_cmp_and3(i32 %in) {
 define i32 @add_select_cmp_and4(i32 %in) {
 ; CHECK-LABEL: @add_select_cmp_and4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
-; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 12
-; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
-; CHECK-NEXT:    ret i32 [[OUT]]
+; CHECK-NEXT:    [[TEMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT:    [[OUT1:%.*]] = or disjoint i32 [[OUT]], [[TEMP3]]
+; CHECK-NEXT:    ret i32 [[OUT1]]
 ;
   %bitop0 = and i32 %in, 1
   %cmp0 = icmp eq i32 %bitop0, 0
@@ -2119,6 +2119,45 @@ define i32 @add_select_cmp_and4(i32 %in) {
   ret i32 %out
 }
 
+define i32 @add_select_cmp_trunc(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_trunc(
+; CHECK-NEXT:    [[CMP0:%.*]] = trunc i32 [[IN:%.*]] to i1
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %cmp0 = trunc i32 %in to i1
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_trunc1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_trunc1(
+; CHECK-NEXT:    [[CMP0:%.*]] = trunc i32 [[IN:%.*]] to i1
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1_NOT]], i32 0, i32 144
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %cmp0 = trunc i32 %in to i1
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp ne i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 144, i32 0
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+
 define i32 @add_select_cmp_and_const_mismatch(i32 %in) {
 ; CHECK-LABEL: @add_select_cmp_and_const_mismatch(
 ; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1

>From d16418ea3d9c211d95a24c63392c18a4ed605f5d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 16 Apr 2025 16:06:36 -0700
Subject: [PATCH 6/7] Support mismatched preds + move tests

Change-Id: I769481b102dc943039b2bd07a815f2716e58fed4
---
 .../InstCombine/InstCombineAndOrXor.cpp       |   9 +-
 .../test/Transforms/InstCombine/or-bitmask.ll | 328 ++++++++++++++++++
 llvm/test/Transforms/InstCombine/or.ll        | 282 ---------------
 3 files changed, 333 insertions(+), 286 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/or-bitmask.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 8cfd2f5bfc2e6..3bf0aec6d4e51 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3665,16 +3665,17 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
                            /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
 
       if (LHSDecompose && RHSDecompose && LHSDecompose->X == RHSDecompose->X &&
-          LHSDecompose->Pred == RHSDecompose->Pred &&
           (ICmpInst::isEquality(LHSDecompose->Pred)) &&
           !RHSDecompose->Mask.isNegative() &&
           !LHSDecompose->Mask.isNegative() && RHSDecompose->Mask.isPowerOf2() &&
           LHSDecompose->Mask.isPowerOf2() &&
-          LHSDecompose->Mask != RHSDecompose->Mask) {
-        if (LHSDecompose->Pred == ICmpInst::ICMP_NE) {
+          LHSDecompose->Mask != RHSDecompose->Mask &&
+          LHSDecompose->C.isZero() && RHSDecompose->C.isZero()) {
+        if (LHSDecompose->Pred == ICmpInst::ICMP_NE)
           std::swap(Op0Eq, Op0Ne);
+        if (RHSDecompose->Pred == ICmpInst::ICMP_NE)
           std::swap(Op1Eq, Op1Ne);
-        }
+
         if (!Op0Ne->isNegative() && !Op1Ne->isNegative() && Op0Eq->isZero() &&
             Op1Eq->isZero() && Op0Ne->urem(LHSDecompose->Mask).isZero() &&
             Op1Ne->urem(RHSDecompose->Mask).isZero() &&
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
new file mode 100644
index 0000000000000..2c020fd9456e1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -0,0 +1,328 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,CONSTVEC
+; RUN: opt < %s -passes=instcombine -S -use-constant-int-for-fixed-length-splat | FileCheck %s --check-prefixes=CHECK,CONSTSPLAT
+
+define i32 @add_select_cmp_and1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and2(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 5
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 4
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 288
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and3(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[BITOP2:%.*]] = and i32 [[IN]], 4
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[BITOP2]], 0
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[SEL2]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %temp = or disjoint i32 %sel0, %sel1
+  %bitop2 = and i32 %in, 4
+  %cmp2 = icmp eq i32 %bitop2, 0
+  %sel2 = select i1 %cmp2, i32 0, i32 288
+  %out = or disjoint i32 %temp, %sel2
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and4(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT:    [[TEMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT:    [[OUT1:%.*]] = or disjoint i32 [[OUT]], [[TEMP3]]
+; CHECK-NEXT:    ret i32 [[OUT1]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %temp = or disjoint i32 %sel0, %sel1
+  %bitop2 = and i32 %in, 4
+  %cmp2 = icmp eq i32 %bitop2, 0
+  %bitop3 = and i32 %in, 8
+  %cmp3 = icmp eq i32 %bitop3, 0
+  %sel2 = select i1 %cmp2, i32 0, i32 288
+  %sel3 = select i1 %cmp3, i32 0, i32 576
+  %temp2 = or disjoint i32 %sel2, %sel3
+  %out = or disjoint i32 %temp, %temp2
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_pred1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_pred1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp ne i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 72, i32 0
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_pred2(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_pred2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp ne i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 144, i32 0
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_pred3(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_pred3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp ne i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp ne i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 72, i32 0
+  %sel1 = select i1 %cmp1, i32 144, i32 0
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_trunc(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_trunc(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %cmp0 = trunc i32 %in to i1
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 72, i32 0
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_trunc1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_trunc1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %cmp0 = trunc i32 %in to i1
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp ne i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 72, i32 0
+  %sel1 = select i1 %cmp1, i32 144, i32 0
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+
+define i32 @add_select_cmp_and_const_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_const_mismatch(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 288
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 288
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_value_mismatch(i32 %in, i32 %in1) {
+; CHECK-LABEL: @add_select_cmp_and_value_mismatch(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN1:%.*]], 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in1, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_negative(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_negative(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[IN]], 2
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 -144
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, -2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 -144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_bitsel_overlap(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_bitsel_overlap(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 2
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 144
+; CHECK-NEXT:    ret i32 [[SEL0]]
+;
+  %bitop0 = and i32 %in, 2
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 144
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+; We cannot combine into and-mul, as %bitop1 may not be exactly 6
+
+define i32 @add_select_cmp_and_multbit_mask(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_multbit_mask(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 6
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 432
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %bitop1 = and i32 %in, 6
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = select i1 %cmp1, i32 0, i32 432
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+
+define <2 x i32> @add_select_cmp_vec(<2 x i32> %in) {
+; CHECK-LABEL: @add_select_cmp_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[IN:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw <2 x i32> [[TMP1]], splat (i32 72)
+; CHECK-NEXT:    ret <2 x i32> [[OUT]]
+;
+  %bitop0 = and <2 x i32> %in, <i32 1, i32 1>
+  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
+  %bitop1 = and <2 x i32> %in, <i32 2, i32 2>
+  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
+  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 72>
+  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 144, i32 144>
+  %out = or disjoint <2 x i32> %sel0, %sel1
+  ret <2 x i32> %out
+}
+
+define <2 x i32> @add_select_cmp_vec_poison(<2 x i32> %in) {
+; CHECK-LABEL: @add_select_cmp_vec_poison(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and <2 x i32> [[IN:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i32> [[BITOP0]], zeroinitializer
+; CHECK-NEXT:    [[BITOP1:%.*]] = and <2 x i32> [[IN]], splat (i32 2)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[BITOP1]], zeroinitializer
+; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> <i32 poison, i32 144>
+; CHECK-NEXT:    [[OUT:%.*]] = select <2 x i1> [[CMP0]], <2 x i32> [[SEL1]], <2 x i32> <i32 72, i32 poison>
+; CHECK-NEXT:    ret <2 x i32> [[OUT]]
+;
+  %bitop0 = and <2 x i32> %in, <i32 1, i32 1>
+  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
+  %bitop1 = and <2 x i32> %in, <i32 2, i32 2>
+  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
+  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 poison>
+  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 poison, i32 144>
+  %out = or disjoint <2 x i32> %sel0, %sel1
+  ret <2 x i32> %out
+}
+
+define <2 x i32> @add_select_cmp_vec_nonunique(<2 x i32> %in) {
+; CHECK-LABEL: @add_select_cmp_vec_nonunique(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and <2 x i32> [[IN:%.*]], <i32 1, i32 2>
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i32> [[BITOP0]], zeroinitializer
+; CHECK-NEXT:    [[BITOP1:%.*]] = and <2 x i32> [[IN]], <i32 4, i32 8>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[BITOP1]], zeroinitializer
+; CHECK-NEXT:    [[SEL0:%.*]] = select <2 x i1> [[CMP0]], <2 x i32> zeroinitializer, <2 x i32> <i32 72, i32 144>
+; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> <i32 288, i32 576>
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint <2 x i32> [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret <2 x i32> [[OUT]]
+;
+  %bitop0 = and <2 x i32> %in, <i32 1, i32 2>
+  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
+  %bitop1 = and <2 x i32> %in, <i32 4, i32 8>
+  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
+  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 144>
+  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 288, i32 576>
+  %out = or disjoint <2 x i32> %sel0, %sel1
+  ret <2 x i32> %out
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CONSTSPLAT: {{.*}}
+; CONSTVEC: {{.*}}
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index e8514590a6618..6887400e7770f 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -2035,285 +2035,3 @@ define i32 @or_xor_and_commuted3(i32 %x, i32 %y, i32 %z) {
   %or1 = or i32 %xor, %yy
   ret i32 %or1
 }
-
-define i32 @add_select_cmp_and1(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and1(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
-; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 144
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_and2(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and2(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 5
-; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 4
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 288
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_and3(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and3(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
-; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    [[BITOP2:%.*]] = and i32 [[IN]], 4
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[BITOP2]], 0
-; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[SEL2]]
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 144
-  %temp = or disjoint i32 %sel0, %sel1
-  %bitop2 = and i32 %in, 4
-  %cmp2 = icmp eq i32 %bitop2, 0
-  %sel2 = select i1 %cmp2, i32 0, i32 288
-  %out = or disjoint i32 %temp, %sel2
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_and4(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and4(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
-; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 12
-; CHECK-NEXT:    [[TEMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT:    [[OUT1:%.*]] = or disjoint i32 [[OUT]], [[TEMP3]]
-; CHECK-NEXT:    ret i32 [[OUT1]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 144
-  %temp = or disjoint i32 %sel0, %sel1
-  %bitop2 = and i32 %in, 4
-  %cmp2 = icmp eq i32 %bitop2, 0
-  %bitop3 = and i32 %in, 8
-  %cmp3 = icmp eq i32 %bitop3, 0
-  %sel2 = select i1 %cmp2, i32 0, i32 288
-  %sel3 = select i1 %cmp3, i32 0, i32 576
-  %temp2 = or disjoint i32 %sel2, %sel3
-  %out = or disjoint i32 %temp, %temp2
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_trunc(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_trunc(
-; CHECK-NEXT:    [[CMP0:%.*]] = trunc i32 [[IN:%.*]] to i1
-; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
-; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %cmp0 = trunc i32 %in to i1
-  %bitop1 = and i32 %in, 2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 144
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_trunc1(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_trunc1(
-; CHECK-NEXT:    [[CMP0:%.*]] = trunc i32 [[IN:%.*]] to i1
-; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
-; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[BITOP1]], 0
-; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1_NOT]], i32 0, i32 144
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %cmp0 = trunc i32 %in to i1
-  %bitop1 = and i32 %in, 2
-  %cmp1 = icmp ne i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 144, i32 0
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-
-define i32 @add_select_cmp_and_const_mismatch(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and_const_mismatch(
-; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
-; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 2
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
-; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 288
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 288
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_and_value_mismatch(i32 %in, i32 %in1) {
-; CHECK-LABEL: @add_select_cmp_and_value_mismatch(
-; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
-; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN1:%.*]], 2
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
-; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in1, 2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 144
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_and_negative(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and_negative(
-; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[IN]], 2
-; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 -144
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, -2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 -144
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-define i32 @add_select_cmp_and_bitsel_overlap(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and_bitsel_overlap(
-; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 2
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
-; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 144
-; CHECK-NEXT:    ret i32 [[SEL0]]
-;
-  %bitop0 = and i32 %in, 2
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 2
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 144
-  %sel1 = select i1 %cmp1, i32 0, i32 144
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-; We cannot combine into and-mul, as %bitop1 may not be exactly 6
-
-define i32 @add_select_cmp_and_multbit_mask(i32 %in) {
-; CHECK-LABEL: @add_select_cmp_and_multbit_mask(
-; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
-; CHECK-NEXT:    [[BITOP1:%.*]] = and i32 [[IN]], 6
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[BITOP1]], 0
-; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 432
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
-; CHECK-NEXT:    ret i32 [[OUT]]
-;
-  %bitop0 = and i32 %in, 1
-  %cmp0 = icmp eq i32 %bitop0, 0
-  %bitop1 = and i32 %in, 6
-  %cmp1 = icmp eq i32 %bitop1, 0
-  %sel0 = select i1 %cmp0, i32 0, i32 72
-  %sel1 = select i1 %cmp1, i32 0, i32 432
-  %out = or disjoint i32 %sel0, %sel1
-  ret i32 %out
-}
-
-
-define <2 x i32> @add_select_cmp_vec(<2 x i32> %in) {
-; CHECK-LABEL: @add_select_cmp_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[IN:%.*]], splat (i32 3)
-; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw <2 x i32> [[TMP1]], splat (i32 72)
-; CHECK-NEXT:    ret <2 x i32> [[OUT]]
-;
-  %bitop0 = and <2 x i32> %in, <i32 1, i32 1>
-  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
-  %bitop1 = and <2 x i32> %in, <i32 2, i32 2>
-  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
-  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 72>
-  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 144, i32 144>
-  %out = or disjoint <2 x i32> %sel0, %sel1
-  ret <2 x i32> %out
-}
-
-define <2 x i32> @add_select_cmp_vec_poison(<2 x i32> %in) {
-; CHECK-LABEL: @add_select_cmp_vec_poison(
-; CHECK-NEXT:    [[BITOP0:%.*]] = and <2 x i32> [[IN:%.*]], splat (i32 1)
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i32> [[BITOP0]], zeroinitializer
-; CHECK-NEXT:    [[BITOP1:%.*]] = and <2 x i32> [[IN]], splat (i32 2)
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[BITOP1]], zeroinitializer
-; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> <i32 poison, i32 144>
-; CHECK-NEXT:    [[OUT:%.*]] = select <2 x i1> [[CMP0]], <2 x i32> [[SEL1]], <2 x i32> <i32 72, i32 poison>
-; CHECK-NEXT:    ret <2 x i32> [[OUT]]
-;
-  %bitop0 = and <2 x i32> %in, <i32 1, i32 1>
-  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
-  %bitop1 = and <2 x i32> %in, <i32 2, i32 2>
-  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
-  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 poison>
-  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 poison, i32 144>
-  %out = or disjoint <2 x i32> %sel0, %sel1
-  ret <2 x i32> %out
-}
-
-define <2 x i32> @add_select_cmp_vec_nonunique(<2 x i32> %in) {
-; CHECK-LABEL: @add_select_cmp_vec_nonunique(
-; CHECK-NEXT:    [[BITOP0:%.*]] = and <2 x i32> [[IN:%.*]], <i32 1, i32 2>
-; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i32> [[BITOP0]], zeroinitializer
-; CHECK-NEXT:    [[BITOP1:%.*]] = and <2 x i32> [[IN]], <i32 4, i32 8>
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[BITOP1]], zeroinitializer
-; CHECK-NEXT:    [[SEL0:%.*]] = select <2 x i1> [[CMP0]], <2 x i32> zeroinitializer, <2 x i32> <i32 72, i32 144>
-; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> <i32 288, i32 576>
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint <2 x i32> [[SEL0]], [[SEL1]]
-; CHECK-NEXT:    ret <2 x i32> [[OUT]]
-;
-  %bitop0 = and <2 x i32> %in, <i32 1, i32 2>
-  %cmp0 = icmp eq <2 x i32> %bitop0, <i32 0, i32 0>
-  %bitop1 = and <2 x i32> %in, <i32 4, i32 8>
-  %cmp1 = icmp eq <2 x i32> %bitop1, <i32 0, i32 0>
-  %sel0 = select <2 x i1> %cmp0, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 72, i32 144>
-  %sel1 = select <2 x i1> %cmp1, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 288, i32 576>
-  %out = or disjoint <2 x i32> %sel0, %sel1
-  ret <2 x i32> %out
-}

>From c74714cf3af7c8cdde937d4c9c2df54144463838 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 17 Apr 2025 10:11:18 -0700
Subject: [PATCH 7/7] [InstCombine] Extend bitmask->select combine to match
 and->mul

Change-Id: I1cc2acd3804dde50636518f3ef2c9581848ae9f6
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 123 +++++++++++-------
 .../test/Transforms/InstCombine/or-bitmask.ll |  96 ++++++++++++--
 2 files changed, 164 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3bf0aec6d4e51..b53ca44806b75 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3564,6 +3564,72 @@ static Value *foldOrOfInversions(BinaryOperator &I,
   return nullptr;
 }
 
+struct DecomposedBitMaskMul {
+  Value *X;
+  APInt Factor;
+  APInt Mask;
+};
+
+static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
+  Instruction *Op = dyn_cast<Instruction>(V);
+  if (!Op)
+    return std::nullopt;
+
+  Value *MulOp = nullptr;
+  const APInt *MulConst = nullptr;
+  if (match(Op, m_Mul(m_Value(MulOp), m_APInt(MulConst)))) {
+    Value *Original = nullptr;
+    const APInt *Mask = nullptr;
+    if (!MulConst->isStrictlyPositive())
+      return std::nullopt;
+
+    if (match(MulOp, m_And(m_Value(Original), m_APInt(Mask)))) {
+      if (!Mask->isStrictlyPositive())
+        return std::nullopt;
+      DecomposedBitMaskMul Ret;
+      Ret.X = Original;
+      Ret.Mask = *Mask;
+      Ret.Factor = *MulConst;
+      return Ret;
+    }
+    return std::nullopt;
+  }
+
+  Value *Cond = nullptr;
+  const APInt *EqZero = nullptr, *NeZero = nullptr;
+
+  //  (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
+  if (match(Op, m_Select(m_Value(Cond), m_APInt(EqZero), m_APInt(NeZero)))) {
+    auto ICmpDecompose =
+        decomposeBitTest(Cond, /*LookThruTrunc=*/true,
+                         /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
+    if (!ICmpDecompose.has_value())
+      return std::nullopt;
+
+    if (ICmpDecompose->Pred == ICmpInst::ICMP_NE)
+      std::swap(EqZero, NeZero);
+
+    if (!EqZero->isZero() || !NeZero->isStrictlyPositive())
+      return std::nullopt;
+
+    if (!ICmpInst::isEquality(ICmpDecompose->Pred) ||
+        !ICmpDecompose->C.isZero() || !ICmpDecompose->Mask.isPowerOf2() ||
+        ICmpDecompose->Mask.isNegative())
+      return std::nullopt;
+
+    if (!NeZero->urem(ICmpDecompose->Mask).isZero())
+      return std::nullopt;
+
+    DecomposedBitMaskMul Ret;
+    Ret.X = ICmpDecompose->X;
+    Ret.Mask = ICmpDecompose->Mask;
+    Ret.Factor = NeZero->udiv(ICmpDecompose->Mask);
+    return Ret;
+  }
+
+  return std::nullopt;
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -3646,50 +3712,19 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
                                    /*NSW=*/true, /*NUW=*/true))
       return R;
 
-    Value *Cond0 = nullptr, *Cond1 = nullptr;
-    const APInt *Op0Eq = nullptr, *Op0Ne = nullptr;
-    const APInt *Op1Eq = nullptr, *Op1Ne = nullptr;
-
-    //  (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
-    if (match(I.getOperand(0),
-              m_Select(m_Value(Cond0), m_APInt(Op0Eq), m_APInt(Op0Ne))) &&
-        match(I.getOperand(1),
-              m_Select(m_Value(Cond1), m_APInt(Op1Eq), m_APInt(Op1Ne)))) {
-      CmpPredicate Pred0, Pred1;
-
-      auto LHSDecompose =
-          decomposeBitTest(Cond0, /*LookThruTrunc=*/true,
-                           /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
-      auto RHSDecompose =
-          decomposeBitTest(Cond1, /*LookThruTrunc=*/true,
-                           /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
-
-      if (LHSDecompose && RHSDecompose && LHSDecompose->X == RHSDecompose->X &&
-          (ICmpInst::isEquality(LHSDecompose->Pred)) &&
-          !RHSDecompose->Mask.isNegative() &&
-          !LHSDecompose->Mask.isNegative() && RHSDecompose->Mask.isPowerOf2() &&
-          LHSDecompose->Mask.isPowerOf2() &&
-          LHSDecompose->Mask != RHSDecompose->Mask &&
-          LHSDecompose->C.isZero() && RHSDecompose->C.isZero()) {
-        if (LHSDecompose->Pred == ICmpInst::ICMP_NE)
-          std::swap(Op0Eq, Op0Ne);
-        if (RHSDecompose->Pred == ICmpInst::ICMP_NE)
-          std::swap(Op1Eq, Op1Ne);
-
-        if (!Op0Ne->isNegative() && !Op1Ne->isNegative() && Op0Eq->isZero() &&
-            Op1Eq->isZero() && Op0Ne->urem(LHSDecompose->Mask).isZero() &&
-            Op1Ne->urem(RHSDecompose->Mask).isZero() &&
-            Op0Ne->udiv(LHSDecompose->Mask) ==
-                Op1Ne->udiv(RHSDecompose->Mask)) {
-          auto NewAnd = Builder.CreateAnd(
-              LHSDecompose->X,
-              ConstantInt::get(LHSDecompose->X->getType(),
-                               (LHSDecompose->Mask + RHSDecompose->Mask)));
-
-          return BinaryOperator::CreateMul(
-              NewAnd, ConstantInt::get(NewAnd->getType(),
-                                       Op0Ne->udiv(LHSDecompose->Mask)));
-        }
+    auto Decomp0 = matchBitmaskMul(I.getOperand(0));
+    auto Decomp1 = matchBitmaskMul(I.getOperand(1));
+
+    if (Decomp0 && Decomp1) {
+      if (Decomp0->X == Decomp1->X &&
+          (Decomp0->Mask & Decomp1->Mask).isZero() &&
+          Decomp0->Factor == Decomp1->Factor) {
+        auto NewAnd = Builder.CreateAnd(
+            Decomp0->X, ConstantInt::get(Decomp0->X->getType(),
+                                         (Decomp0->Mask + Decomp1->Mask)));
+
+        return BinaryOperator::CreateMul(
+            NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor));
       }
     }
   }
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 2c020fd9456e1..d5384a26c973a 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -36,13 +36,9 @@ define i32 @add_select_cmp_and2(i32 %in) {
 
 define i32 @add_select_cmp_and3(i32 %in) {
 ; CHECK-LABEL: @add_select_cmp_and3(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 7
 ; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    [[BITOP2:%.*]] = and i32 [[IN]], 4
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[BITOP2]], 0
-; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[SEL2]]
-; CHECK-NEXT:    ret i32 [[OUT]]
+; CHECK-NEXT:    ret i32 [[TEMP]]
 ;
   %bitop0 = and i32 %in, 1
   %cmp0 = icmp eq i32 %bitop0, 0
@@ -60,12 +56,9 @@ define i32 @add_select_cmp_and3(i32 %in) {
 
 define i32 @add_select_cmp_and4(i32 %in) {
 ; CHECK-LABEL: @add_select_cmp_and4(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
-; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN:%.*]], 15
 ; CHECK-NEXT:    [[TEMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT:    [[OUT1:%.*]] = or disjoint i32 [[OUT]], [[TEMP3]]
-; CHECK-NEXT:    ret i32 [[OUT1]]
+; CHECK-NEXT:    ret i32 [[TEMP3]]
 ;
   %bitop0 = and i32 %in, 1
   %cmp0 = icmp eq i32 %bitop0, 0
@@ -323,6 +316,87 @@ define <2 x i32> @add_select_cmp_vec_nonunique(<2 x i32> %in) {
   %out = or disjoint <2 x i32> %sel0, %sel1
   ret <2 x i32> %out
 }
+
+define i32 @add_select_cmp_mixed1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_mixed1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %mask = and i32 %in, 1
+  %sel0 = mul i32 %mask, 72
+  %bitop1 = and i32 %in, 2
+  %cmp1 = icmp eq i32 %bitop1, 0
+  %sel1 = select i1 %cmp1, i32 0, i32 144
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_mixed2(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_mixed2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %mask = and i32 %in, 2
+  %sel0 = select i1 %cmp0, i32 0, i32 72
+  %sel1 = mul i32 %mask, 72
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_mul(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %mask0 = and i32 %in, 1
+  %sel0 = mul i32 %mask0, 72
+  %mask1 = and i32 %in, 2
+  %sel1 = mul i32 %mask1, 72
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_mixed2_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_mixed2_mismatch(
+; CHECK-NEXT:    [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT:    [[MASK:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 73
+; CHECK-NEXT:    [[SEL1:%.*]] = mul nuw nsw i32 [[MASK]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %bitop0 = and i32 %in, 1
+  %cmp0 = icmp eq i32 %bitop0, 0
+  %mask = and i32 %in, 2
+  %sel0 = select i1 %cmp0, i32 0, i32 73
+  %sel1 = mul i32 %mask, 72
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
+define i32 @add_select_cmp_and_mul_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_mul_mismatch(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[IN:%.*]] to i1
+; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[TMP1]], i32 73, i32 0
+; CHECK-NEXT:    [[MASK1:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT:    [[SEL1:%.*]] = mul nuw nsw i32 [[MASK1]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %mask0 = and i32 %in, 1
+  %sel0 = mul i32 %mask0, 73
+  %mask1 = and i32 %in, 2
+  %sel1 = mul i32 %mask1, 72
+  %out = or disjoint i32 %sel0, %sel1
+  ret i32 %out
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CONSTSPLAT: {{.*}}
 ; CONSTVEC: {{.*}}