[llvm] [InstCombine] Extend bitmask mul combine to handle independent operands (PR #142503)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 27 10:02:57 PDT 2025


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/142503

>From e9996d1d98980da1b1dea67f3fa5d5aef760570a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 2 Jun 2025 12:29:39 -0700
Subject: [PATCH 1/8] [InstCombine] Extend bitmask mul combine to handle
 independent operands

Change-Id: Ife1a010d2ae6df40549a6c73f7b893948befa3be
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 92 +++++++++++++++----
 .../test/Transforms/InstCombine/or-bitmask.ll | 50 ++++++++++
 2 files changed, 123 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index dce695a036006..099359021a394 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3602,6 +3602,11 @@ struct DecomposedBitMaskMul {
   APInt Mask;
   bool NUW;
   bool NSW;
+
+  bool isCombineableWith(DecomposedBitMaskMul Other) {
+    return X == Other.X && (Mask & Other.Mask).isZero() &&
+           Factor == Other.Factor;
+  }
 };
 
 static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
@@ -3659,6 +3664,34 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
   return std::nullopt;
 }
 
+using CombinedBitmaskMul =
+    std::pair<std::optional<DecomposedBitMaskMul>, Value *>;
+
+static CombinedBitmaskMul matchCombinedBitmaskMul(Value *V) {
+  auto DecompBitMaskMul = matchBitmaskMul(V);
+  if (DecompBitMaskMul)
+    return {DecompBitMaskMul, nullptr};
+
+  // Otherwise, check the operands of V for bitmaskmul pattern
+  auto BOp = dyn_cast<BinaryOperator>(V);
+  if (!BOp)
+    return {std::nullopt, nullptr};
+
+  auto Disj = dyn_cast<PossiblyDisjointInst>(BOp);
+  if (!Disj || !Disj->isDisjoint())
+    return {std::nullopt, nullptr};
+
+  auto DecompBitMaskMul0 = matchBitmaskMul(BOp->getOperand(0));
+  if (DecompBitMaskMul0)
+    return {DecompBitMaskMul0, BOp->getOperand(1)};
+
+  auto DecompBitMaskMul1 = matchBitmaskMul(BOp->getOperand(1));
+  if (DecompBitMaskMul1)
+    return {DecompBitMaskMul1, BOp->getOperand(0)};
+
+  return {std::nullopt, nullptr};
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -3741,25 +3774,46 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
                                    /*NSW=*/true, /*NUW=*/true))
       return R;
 
-    // (A & N) * C + (A & M) * C -> (A & (N + M)) & C
-    // This also accepts the equivalent select form of (A & N) * C
-    // expressions i.e. !(A & N) ? 0 : N * C)
-    auto Decomp1 = matchBitmaskMul(I.getOperand(1));
-    if (Decomp1) {
-      auto Decomp0 = matchBitmaskMul(I.getOperand(0));
-      if (Decomp0 && Decomp0->X == Decomp1->X &&
-          (Decomp0->Mask & Decomp1->Mask).isZero() &&
-          Decomp0->Factor == Decomp1->Factor) {
-
-        Value *NewAnd = Builder.CreateAnd(
-            Decomp0->X, ConstantInt::get(Decomp0->X->getType(),
-                                         (Decomp0->Mask + Decomp1->Mask)));
-
-        auto *Combined = BinaryOperator::CreateMul(
-            NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor));
-
-        Combined->setHasNoUnsignedWrap(Decomp0->NUW && Decomp1->NUW);
-        Combined->setHasNoSignedWrap(Decomp0->NSW && Decomp1->NSW);
+    // (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
+    // This also accepts the equivalent mul form of (A & N) ? 0 : N * C)
+    // expressions i.e. (A & N) * C
+    CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
+    auto BMDecomp1 = Decomp1.first;
+
+    if (BMDecomp1) {
+      CombinedBitmaskMul Decomp0 = matchCombinedBitmaskMul(I.getOperand(0));
+      auto BMDecomp0 = Decomp0.first;
+
+      if (BMDecomp0 && BMDecomp0->isCombineableWith(*BMDecomp1)) {
+        auto NewAnd = Builder.CreateAnd(
+            BMDecomp0->X,
+            ConstantInt::get(BMDecomp0->X->getType(),
+                             (BMDecomp0->Mask + BMDecomp1->Mask)));
+
+        BinaryOperator *Combined = cast<BinaryOperator>(Builder.CreateMul(
+            NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor)));
+
+        Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
+        Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+
+        // If our tree has indepdent or-disjoint operands, bring them in.
+        auto OtherOp0 = Decomp0.second;
+        auto OtherOp1 = Decomp1.second;
+
+        if (OtherOp0 || OtherOp1) {
+          Value *OtherOp;
+          if (OtherOp0 && OtherOp1) {
+            OtherOp = Builder.CreateOr(OtherOp0, OtherOp1);
+            cast<PossiblyDisjointInst>(OtherOp)->setIsDisjoint(true);
+          } else {
+            OtherOp = OtherOp0 ? OtherOp0 : OtherOp1;
+          }
+          Combined = cast<BinaryOperator>(Builder.CreateOr(Combined, OtherOp));
+          cast<PossiblyDisjointInst>(Combined)->setIsDisjoint(true);
+        }
+
+        // Caller expects detached instruction
+        Combined->removeFromParent();
         return Combined;
       }
     }
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 3c992dfea569a..0976b76542f49 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -451,6 +451,56 @@ define i32 @and_mul_non_disjoint(i32 %in) {
   ret i32 %out
 }
 
+define i32 @unrelated_ops(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_ops(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp3 = or disjoint i32 %in2, %temp2
+  %out = or disjoint i32 %temp, %temp3
+  ret i32 %out
+}
+
+define i32 @unrelated_ops1(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_ops1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp3 = or disjoint i32 %in2, %temp
+  %out = or disjoint i32 %temp3, %temp2
+  ret i32 %out
+}
+
+define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %temp, %in3
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp4 = or disjoint i32 %in2, %temp2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CONSTSPLAT: {{.*}}
 ; CONSTVEC: {{.*}}

>From 171ca6bf1544359c8e55056e2e0f21d71bb7b6ea Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 12 Jun 2025 08:50:01 -0700
Subject: [PATCH 2/8] Fix comment from bad merge

Change-Id: I879acdf0b17a7110286c6c375410300611c468eb
---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 099359021a394..c6c0a85b06bdd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3774,9 +3774,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
                                    /*NSW=*/true, /*NUW=*/true))
       return R;
 
-    // (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
-    // This also accepts the equivalent mul form of (A & N) ? 0 : N * C)
-    // expressions i.e. (A & N) * C
+    // (A & N) * C + (A & M) * C -> (A & (N + M)) & C
+    // This also accepts the equivalent select form of (A & N) * C
+    // expressions i.e. !(A & N) ? 0 : N * C)
     CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
     auto BMDecomp1 = Decomp1.first;
 

>From f46aaa715f288d3af964040146f0b7c93c69f52c Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 23 Jun 2025 11:16:10 -0700
Subject: [PATCH 3/8] Reassociate instead of combine

Change-Id: Ib86e8ed347ef60948c3e4cb44c5fab1c3667afc6
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 76 ++++++++++---------
 .../test/Transforms/InstCombine/or-bitmask.ll | 59 +++++++++++++-
 2 files changed, 98 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c6c0a85b06bdd..fde7014d7c246 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3664,32 +3664,35 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
   return std::nullopt;
 }
 
-using CombinedBitmaskMul =
-    std::pair<std::optional<DecomposedBitMaskMul>, Value *>;
+struct CombinedBitmaskMul {
+  std::optional<DecomposedBitMaskMul> Decomp = std::nullopt;
+  Value *DecompOp = nullptr;
+  Value *OtherOp = nullptr;
+};
 
 static CombinedBitmaskMul matchCombinedBitmaskMul(Value *V) {
   auto DecompBitMaskMul = matchBitmaskMul(V);
   if (DecompBitMaskMul)
-    return {DecompBitMaskMul, nullptr};
+    return {DecompBitMaskMul, V, nullptr};
 
   // Otherwise, check the operands of V for bitmaskmul pattern
   auto BOp = dyn_cast<BinaryOperator>(V);
   if (!BOp)
-    return {std::nullopt, nullptr};
+    return CombinedBitmaskMul();
 
   auto Disj = dyn_cast<PossiblyDisjointInst>(BOp);
   if (!Disj || !Disj->isDisjoint())
-    return {std::nullopt, nullptr};
+    return CombinedBitmaskMul();
 
   auto DecompBitMaskMul0 = matchBitmaskMul(BOp->getOperand(0));
   if (DecompBitMaskMul0)
-    return {DecompBitMaskMul0, BOp->getOperand(1)};
+    return {DecompBitMaskMul0, BOp->getOperand(0), BOp->getOperand(1)};
 
   auto DecompBitMaskMul1 = matchBitmaskMul(BOp->getOperand(1));
   if (DecompBitMaskMul1)
-    return {DecompBitMaskMul1, BOp->getOperand(0)};
+    return {DecompBitMaskMul1, BOp->getOperand(1), BOp->getOperand(0)};
 
-  return {std::nullopt, nullptr};
+  return CombinedBitmaskMul();
 }
 
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
@@ -3778,43 +3781,44 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
     // This also accepts the equivalent select form of (A & N) * C
     // expressions i.e. !(A & N) ? 0 : N * C)
     CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
-    auto BMDecomp1 = Decomp1.first;
+    auto BMDecomp1 = Decomp1.Decomp;
 
     if (BMDecomp1) {
       CombinedBitmaskMul Decomp0 = matchCombinedBitmaskMul(I.getOperand(0));
-      auto BMDecomp0 = Decomp0.first;
-
-      if (BMDecomp0 && BMDecomp0->isCombineableWith(*BMDecomp1)) {
-        auto NewAnd = Builder.CreateAnd(
-            BMDecomp0->X,
-            ConstantInt::get(BMDecomp0->X->getType(),
-                             (BMDecomp0->Mask + BMDecomp1->Mask)));
-
-        BinaryOperator *Combined = cast<BinaryOperator>(Builder.CreateMul(
-            NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor)));
+      auto BMDecomp0 = Decomp0.Decomp;
 
-        Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
-        Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+      if (BMDecomp0) {
+        // If we have independent operands in the BitmaskMul chain, then just
+        // reassociate to encourage combining in future iterations.
+        if (Decomp0.OtherOp || Decomp1.OtherOp) {
+          Value *OtherOp = Decomp0.OtherOp ? Decomp0.OtherOp : Decomp1.OtherOp;
 
-        // If our tree has indepdent or-disjoint operands, bring them in.
-        auto OtherOp0 = Decomp0.second;
-        auto OtherOp1 = Decomp1.second;
-
-        if (OtherOp0 || OtherOp1) {
-          Value *OtherOp;
-          if (OtherOp0 && OtherOp1) {
-            OtherOp = Builder.CreateOr(OtherOp0, OtherOp1);
+          if (Decomp0.OtherOp && Decomp1.OtherOp) {
+            OtherOp = Builder.CreateOr(Decomp0.OtherOp, Decomp1.OtherOp);
             cast<PossiblyDisjointInst>(OtherOp)->setIsDisjoint(true);
-          } else {
-            OtherOp = OtherOp0 ? OtherOp0 : OtherOp1;
           }
-          Combined = cast<BinaryOperator>(Builder.CreateOr(Combined, OtherOp));
-          cast<PossiblyDisjointInst>(Combined)->setIsDisjoint(true);
+
+          auto CombinedOp =
+              Builder.CreateOr(Decomp0.DecompOp, Decomp1.DecompOp);
+          cast<PossiblyDisjointInst>(CombinedOp)->setIsDisjoint(true);
+
+          return BinaryOperator::CreateDisjointOr(CombinedOp, OtherOp);
         }
 
-        // Caller expects detached instruction
-        Combined->removeFromParent();
-        return Combined;
+        if (BMDecomp0->isCombineableWith(*BMDecomp1)) {
+          auto NewAnd = Builder.CreateAnd(
+              BMDecomp0->X,
+              ConstantInt::get(BMDecomp0->X->getType(),
+                               (BMDecomp0->Mask + BMDecomp1->Mask)));
+
+          auto *Combined = BinaryOperator::CreateMul(
+              NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor));
+
+          Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
+          Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+
+          return Combined;
+        }
       }
     }
   }
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 0976b76542f49..d3758f6c51963 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -485,9 +485,9 @@ define i32 @unrelated_ops1(i32 %in, i32 %in2) {
 
 define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops2(
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
 ; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[OUT]]
 ;
@@ -501,6 +501,63 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
   ret i32 %out
 }
 
+define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops_nocombine(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 7
+; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %temp, %in3
+  %2 = and i32 %in, 7
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp4 = or disjoint i32 %in2, %temp2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
+define i32 @unrelated_ops_nocombine1(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops_nocombine1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 36
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %temp, %in3
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 36
+  %temp4 = or disjoint i32 %in2, %temp2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
+define i32 @no_chain(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @no_chain(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TEMP3:%.*]] = or disjoint i32 [[TEMP]], [[IN3:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[IN2:%.*]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %temp, %in3
+  %out = or disjoint i32 %temp3, %in2
+  ret i32 %out
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CONSTSPLAT: {{.*}}
 ; CONSTVEC: {{.*}}

>From eda45da8440531c9a6fde0f09fcbddb5d0fadc81 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 24 Jun 2025 09:07:33 -0700
Subject: [PATCH 4/8] Default constructor for std::optional

Change-Id: I950ee32ec053430fd51c7fd52645fe52e9e6ecff
---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index fde7014d7c246..fe6cc37735a6d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3665,7 +3665,7 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
 }
 
 struct CombinedBitmaskMul {
-  std::optional<DecomposedBitMaskMul> Decomp = std::nullopt;
+  std::optional<DecomposedBitMaskMul> Decomp;
   Value *DecompOp = nullptr;
   Value *OtherOp = nullptr;
 };

>From 15fa10a55988170ddc54612a7bcd611cb1a70b61 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 25 Jun 2025 11:04:57 -0700
Subject: [PATCH 5/8] Add tests for the and-icmp-sel form

Change-Id: I875b9fac4749b3f391efce47f8d3b9e2004de8c2
---
 .../test/Transforms/InstCombine/or-bitmask.ll | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index d3758f6c51963..c59e598ba6daa 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -501,6 +501,64 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
   ret i32 %out
 }
 
+define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops3(
+; CHECK-NEXT:    [[TEMP3:%.*]] = or disjoint i32 [[TEMP:%.*]], [[IN3:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
+; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP2]], [[TEMP3]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 2
+  %cmp = icmp eq i32 %1, 0
+  %temp = select i1 %cmp, i32 0, i32 144
+  %temp3 = or disjoint i32 %temp, %in3
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp4 = or disjoint i32 %in2, %temp2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
+define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops4(
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i32 [[IN2:%.*]], [[IN3:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 12
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %in2, %temp
+  %2 = and i32 %in, 2
+  %cmp = icmp eq i32 %2, 0
+  %temp2 = select i1 %cmp, i32 0, i32 144
+  %temp4 = or disjoint i32 %temp2, %in3
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
+define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops5(
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN:%.*]], 6
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 2
+  %cmp = icmp eq i32 %1, 0
+  %temp = select i1 %cmp, i32 0, i32 144
+  %temp3 = or disjoint i32 %temp, %in3
+  %2 = and i32 %in, 4
+  %cmp2 = icmp eq i32 %2, 0
+  %temp2 = select i1 %cmp2, i32 0, i32 288
+  %temp4 = or disjoint i32 %in2, %temp2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
 define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops_nocombine(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3

>From 132a12bf120e2316b0ef5f8ca2b51bd3445c30ff Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 26 Jun 2025 14:20:25 -0700
Subject: [PATCH 6/8] Refactor for reassociation

Change-Id: Ie86d0e58f7fdb2c0489d3dee3a41ef3911f9477b
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 250 ++++++++++--------
 .../InstCombine/InstCombineInternal.h         |   5 +-
 .../test/Transforms/InstCombine/or-bitmask.ll |  24 +-
 3 files changed, 152 insertions(+), 127 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index fe6cc37735a6d..2dd3381c5b7ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2365,8 +2365,10 @@ static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp,
 /// number of and/or instructions might have to be created.
 Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
                                                  Instruction &I, bool IsAnd,
-                                                 bool RHSIsLogical) {
+                                                 bool RHSIsLogical,
+                                                 bool RHSIsDisjoint) {
   Instruction::BinaryOps Opcode = IsAnd ? Instruction::And : Instruction::Or;
+
   // LHS bop (X lop Y) --> (LHS bop X) lop Y
   // LHS bop (X bop Y) --> (LHS bop X) bop Y
   if (Value *Res = foldBooleanAndOr(LHS, X, I, IsAnd, /*IsLogical=*/false))
@@ -2377,6 +2379,40 @@ Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
   if (Value *Res = foldBooleanAndOr(LHS, Y, I, IsAnd, /*IsLogical=*/false))
     return RHSIsLogical ? Builder.CreateLogicalOp(Opcode, X, Res)
                         : Builder.CreateBinOp(Opcode, X, Res);
+
+  if (RHSIsDisjoint && !IsAnd && cast<PossiblyDisjointInst>(&I)->isDisjoint()) {
+    if (Value *Res = foldDisjointOr(LHS, X, I)) {
+      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
+      Disjoint->setIsDisjoint(true);
+      return cast<Value>(Disjoint);
+    }
+    if (Value *Res = foldDisjointOr(LHS, Y, I)) {
+      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
+      Disjoint->setIsDisjoint(true);
+      return cast<Value>(Disjoint);
+    }
+    Value *X1, *Y1;
+    if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1))))) {
+      auto TryFold = [this, &I](Value *Op0, Value *Op1, Value *Rem0,
+                                Value *Rem1) -> Value * {
+        if (Value *Res = foldDisjointOr(Op0, Op1, I)) {
+          auto Disjoint =
+              cast<PossiblyDisjointInst>(Builder.CreateOr(Rem0, Rem1));
+          Disjoint->setIsDisjoint(true);
+          auto Disjoint2 =
+              cast<PossiblyDisjointInst>(Builder.CreateOr(Disjoint, Res));
+          return cast<Value>(Disjoint2);
+        }
+        return nullptr;
+      };
+
+      if (Value *Res = TryFold(X, X1, Y, Y1))
+        return Res;
+
+      if (Value *Res = TryFold(X, Y1, Y, X1))
+        return Res;
+    }
+  }
   return nullptr;
 }
 
@@ -3542,55 +3578,6 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
 }
 
-/// If IsLogical is true, then the and/or is in select form and the transform
-/// must be poison-safe.
-Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
-                                          Instruction &I, bool IsAnd,
-                                          bool IsLogical) {
-  if (!LHS->getType()->isIntOrIntVectorTy(1))
-    return nullptr;
-
-  // handle (roughly):
-  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
-  // (icmp eq (A & B), C) & (icmp eq (A & D), E)
-  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
-                                        SQ.getWithInstruction(&I)))
-    return V;
-
-  if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
-    if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
-      if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
-        return Res;
-
-  if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
-    if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
-      if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
-        return Res;
-
-  if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
-    return Res;
-
-  return nullptr;
-}
-
-static Value *foldOrOfInversions(BinaryOperator &I,
-                                 InstCombiner::BuilderTy &Builder) {
-  assert(I.getOpcode() == Instruction::Or &&
-         "Simplification only supports or at the moment.");
-
-  Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
-  if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
-      !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
-    return nullptr;
-
-  // Check if any two pairs of the and operations are inversions of each other.
-  if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
-    return Builder.CreateXor(Cmp1, Cmp4);
-  if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
-    return Builder.CreateXor(Cmp1, Cmp3);
-
-  return nullptr;
-}
 
 // A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
 // track these properities for preservation. Note that we can decompose
@@ -3664,35 +3651,94 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
   return std::nullopt;
 }
 
-struct CombinedBitmaskMul {
-  std::optional<DecomposedBitMaskMul> Decomp;
-  Value *DecompOp = nullptr;
-  Value *OtherOp = nullptr;
-};
+// (A & N) * C + (A & M) * C -> (A & (N + M)) & C
+// This also accepts the equivalent select form of (A & N) * C
+// expressions i.e. !(A & N) ? 0 : N * C)
+static Value *foldBitmaskMul(Value *Op0, Value *Op1,
+                             InstCombiner::BuilderTy &Builder) {
+  auto Decomp1 = matchBitmaskMul(Op1);
 
-static CombinedBitmaskMul matchCombinedBitmaskMul(Value *V) {
-  auto DecompBitMaskMul = matchBitmaskMul(V);
-  if (DecompBitMaskMul)
-    return {DecompBitMaskMul, V, nullptr};
+  if (Decomp1) {
+    auto Decomp0 = matchBitmaskMul(Op0);
 
-  // Otherwise, check the operands of V for bitmaskmul pattern
-  auto BOp = dyn_cast<BinaryOperator>(V);
-  if (!BOp)
-    return CombinedBitmaskMul();
+    if (Decomp0) {
+      // If we have independent operands in the BitmaskMul chain, then just
+      // reassociate to encourage combining in future iterations.
 
-  auto Disj = dyn_cast<PossiblyDisjointInst>(BOp);
-  if (!Disj || !Disj->isDisjoint())
-    return CombinedBitmaskMul();
+      if (Decomp0->isCombineableWith(*Decomp1)) {
+        auto NewAnd = Builder.CreateAnd(
+            Decomp0->X, ConstantInt::get(Decomp0->X->getType(),
+                                         (Decomp0->Mask + Decomp1->Mask)));
 
-  auto DecompBitMaskMul0 = matchBitmaskMul(BOp->getOperand(0));
-  if (DecompBitMaskMul0)
-    return {DecompBitMaskMul0, BOp->getOperand(0), BOp->getOperand(1)};
+        auto Res = Builder.CreateMul(
+            NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor), "",
+            Decomp0->NUW && Decomp1->NUW, Decomp0->NSW && Decomp1->NSW);
+        return Res;
+      }
+    }
+  }
 
-  auto DecompBitMaskMul1 = matchBitmaskMul(BOp->getOperand(1));
-  if (DecompBitMaskMul1)
-    return {DecompBitMaskMul1, BOp->getOperand(1), BOp->getOperand(0)};
+  return nullptr;
+}
+
+/// If IsLogical is true, then the and/or is in select form and the transform
+/// must be poison-safe.
+Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS,
+                                        Instruction &I) {
+  if (Value *V = foldBitmaskMul(LHS, RHS, Builder))
+    return V;
 
-  return CombinedBitmaskMul();
+  return nullptr;
+}
+
+/// If IsLogical is true, then the and/or is in select form and the transform
+/// must be poison-safe.
+Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
+                                          Instruction &I, bool IsAnd,
+                                          bool IsLogical) {
+  if (!LHS->getType()->isIntOrIntVectorTy(1))
+    return nullptr;
+
+  // handle (roughly):
+  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+  // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
+                                        SQ.getWithInstruction(&I)))
+    return V;
+
+  if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
+    if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
+      if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
+        return Res;
+
+  if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
+    if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
+      if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
+        return Res;
+
+  if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
+    return Res;
+
+  return nullptr;
+}
+
+static Value *foldOrOfInversions(BinaryOperator &I,
+                                 InstCombiner::BuilderTy &Builder) {
+  assert(I.getOpcode() == Instruction::Or &&
+         "Simplification only supports or at the moment.");
+
+  Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
+  if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
+      !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
+    return nullptr;
+
+  // Check if any two pairs of the and operations are inversions of each other.
+  if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
+    return Builder.CreateXor(Cmp1, Cmp4);
+  if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
+    return Builder.CreateXor(Cmp1, Cmp3);
+
+  return nullptr;
 }
 
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
@@ -3777,48 +3823,24 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
                                    /*NSW=*/true, /*NUW=*/true))
       return R;
 
-    // (A & N) * C + (A & M) * C -> (A & (N + M)) & C
-    // This also accepts the equivalent select form of (A & N) * C
-    // expressions i.e. !(A & N) ? 0 : N * C)
-    CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
-    auto BMDecomp1 = Decomp1.Decomp;
-
-    if (BMDecomp1) {
-      CombinedBitmaskMul Decomp0 = matchCombinedBitmaskMul(I.getOperand(0));
-      auto BMDecomp0 = Decomp0.Decomp;
-
-      if (BMDecomp0) {
-        // If we have independent operands in the BitmaskMul chain, then just
-        // reassociate to encourage combining in future iterations.
-        if (Decomp0.OtherOp || Decomp1.OtherOp) {
-          Value *OtherOp = Decomp0.OtherOp ? Decomp0.OtherOp : Decomp1.OtherOp;
-
-          if (Decomp0.OtherOp && Decomp1.OtherOp) {
-            OtherOp = Builder.CreateOr(Decomp0.OtherOp, Decomp1.OtherOp);
-            cast<PossiblyDisjointInst>(OtherOp)->setIsDisjoint(true);
-          }
-
-          auto CombinedOp =
-              Builder.CreateOr(Decomp0.DecompOp, Decomp1.DecompOp);
-          cast<PossiblyDisjointInst>(CombinedOp)->setIsDisjoint(true);
-
-          return BinaryOperator::CreateDisjointOr(CombinedOp, OtherOp);
-        }
-
-        if (BMDecomp0->isCombineableWith(*BMDecomp1)) {
-          auto NewAnd = Builder.CreateAnd(
-              BMDecomp0->X,
-              ConstantInt::get(BMDecomp0->X->getType(),
-                               (BMDecomp0->Mask + BMDecomp1->Mask)));
-
-          auto *Combined = BinaryOperator::CreateMul(
-              NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor));
-
-          Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
-          Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+    if (Value *Res = foldBitmaskMul(I.getOperand(0), I.getOperand(1), Builder))
+      return replaceInstUsesWith(I, Res);
 
-          return Combined;
-        }
+    Value *X, *Y;
+    if (match(I.getOperand(1),
+              m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+      if (auto Res = reassociateBooleanAndOr(
+              I.getOperand(0), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
+              /*RHSIsDisjoint*/ true)) {
+        return replaceInstUsesWith(I, Res);
+      }
+    }
+    if (match(I.getOperand(0),
+              m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+      if (auto Res = reassociateBooleanAndOr(
+              I.getOperand(1), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
+              /*RHSIsDisjoint*/ true)) {
+        return replaceInstUsesWith(I, Res);
       }
     }
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index bf7689bbfde70..60fbcc4575ff9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -431,8 +431,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Value *foldBooleanAndOr(Value *LHS, Value *RHS, Instruction &I, bool IsAnd,
                           bool IsLogical);
 
+  Value *foldDisjointOr(Value *LHS, Value *RHS, Instruction &I);
+
   Value *reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, Instruction &I,
-                                 bool IsAnd, bool RHSIsLogical);
+                                 bool IsAnd, bool RHSIsLogical,
+                                 bool RHSIsDisjoint = false);
 
   Instruction *
   canonicalizeConditionalNegationViaMathToSelect(BinaryOperator &i);
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index c59e598ba6daa..753ec3507fa4e 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -485,10 +485,10 @@ define i32 @unrelated_ops1(i32 %in, i32 %in2) {
 
 define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops2(
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[OUT]]
 ;
   %1 = and i32 %in, 3
@@ -503,10 +503,10 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
 
 define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops3(
-; CHECK-NEXT:    [[TEMP3:%.*]] = or disjoint i32 [[TEMP:%.*]], [[IN3:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
 ; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TEMP2]], [[TEMP3]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[TMP3]], [[TEMP2]]
 ; CHECK-NEXT:    ret i32 [[OUT]]
 ;
   %1 = and i32 %in, 2
@@ -522,10 +522,10 @@ define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
 
 define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops4(
-; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i32 [[IN2:%.*]], [[IN3:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[OUT]]
 ;
   %1 = and i32 %in, 12
@@ -541,10 +541,10 @@ define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
 
 define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops5(
-; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN:%.*]], 6
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[OUT]]
 ;
   %1 = and i32 %in, 2
@@ -563,10 +563,10 @@ define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops_nocombine(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
 ; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[IN3:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 7
 ; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP2]]
 ; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[OUT]]
 ;
@@ -584,10 +584,10 @@ define i32 @unrelated_ops_nocombine1(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops_nocombine1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
 ; CHECK-NEXT:    [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[IN3:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[IN]], 12
 ; CHECK-NEXT:    [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 36
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP2]]
 ; CHECK-NEXT:    [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    ret i32 [[OUT]]
 ;

>From d32b91d15b6262cabdb9d83ed3ad82f6436fb004 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 26 Jun 2025 17:32:52 -0700
Subject: [PATCH 7/8] Formatting

Change-Id: I2c418b8e5bf7fed050ee77515a73fa4368a1ea7d
---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2dd3381c5b7ec..8fb4c1e31fa5b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3578,7 +3578,6 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
 }
 
-
 // A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
 // track these properities for preservation. Note that we can decompose
 // equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *

>From 1909cd927e25d6507b0960cc6b0e92843ac2d8f2 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 27 Jun 2025 08:44:17 -0700
Subject: [PATCH 8/8] Introduce reassociateDisjointOr

Change-Id: I172be21fe78361f4520a893d6c97c422accbf13f
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 203 +++++++++---------
 .../InstCombine/InstCombineInternal.h         |   7 +-
 .../test/Transforms/InstCombine/or-bitmask.ll |  54 +++++
 3 files changed, 165 insertions(+), 99 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 8fb4c1e31fa5b..e17572733f69b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2365,8 +2365,7 @@ static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp,
 /// number of and/or instructions might have to be created.
 Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
                                                  Instruction &I, bool IsAnd,
-                                                 bool RHSIsLogical,
-                                                 bool RHSIsDisjoint) {
+                                                 bool RHSIsLogical) {
   Instruction::BinaryOps Opcode = IsAnd ? Instruction::And : Instruction::Or;
 
   // LHS bop (X lop Y) --> (LHS bop X) lop Y
@@ -2380,39 +2379,6 @@ Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
     return RHSIsLogical ? Builder.CreateLogicalOp(Opcode, X, Res)
                         : Builder.CreateBinOp(Opcode, X, Res);
 
-  if (RHSIsDisjoint && !IsAnd && cast<PossiblyDisjointInst>(&I)->isDisjoint()) {
-    if (Value *Res = foldDisjointOr(LHS, X, I)) {
-      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
-      Disjoint->setIsDisjoint(true);
-      return cast<Value>(Disjoint);
-    }
-    if (Value *Res = foldDisjointOr(LHS, Y, I)) {
-      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
-      Disjoint->setIsDisjoint(true);
-      return cast<Value>(Disjoint);
-    }
-    Value *X1, *Y1;
-    if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1))))) {
-      auto TryFold = [this, &I](Value *Op0, Value *Op1, Value *Rem0,
-                                Value *Rem1) -> Value * {
-        if (Value *Res = foldDisjointOr(Op0, Op1, I)) {
-          auto Disjoint =
-              cast<PossiblyDisjointInst>(Builder.CreateOr(Rem0, Rem1));
-          Disjoint->setIsDisjoint(true);
-          auto Disjoint2 =
-              cast<PossiblyDisjointInst>(Builder.CreateOr(Disjoint, Res));
-          return cast<Value>(Disjoint2);
-        }
-        return nullptr;
-      };
-
-      if (Value *Res = TryFold(X, X1, Y, Y1))
-        return Res;
-
-      if (Value *Res = TryFold(X, Y1, Y, X1))
-        return Res;
-    }
-  }
   return nullptr;
 }
 
@@ -3578,6 +3544,56 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
 }
 
+/// If IsLogical is true, then the and/or is in select form and the transform
+/// must be poison-safe.
+Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
+                                          Instruction &I, bool IsAnd,
+                                          bool IsLogical) {
+  if (!LHS->getType()->isIntOrIntVectorTy(1))
+    return nullptr;
+
+  // handle (roughly):
+  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+  // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
+                                        SQ.getWithInstruction(&I)))
+    return V;
+
+  if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
+    if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
+      if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
+        return Res;
+
+  if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
+    if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
+      if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
+        return Res;
+
+  if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
+    return Res;
+
+  return nullptr;
+}
+
+static Value *foldOrOfInversions(BinaryOperator &I,
+                                 InstCombiner::BuilderTy &Builder) {
+  assert(I.getOpcode() == Instruction::Or &&
+         "Simplification only supports or at the moment.");
+
+  Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
+  if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
+      !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
+    return nullptr;
+
+  // Check if any two pairs of the and operations are inversions of each other.
+  if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
+    return Builder.CreateXor(Cmp1, Cmp4);
+  if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
+    return Builder.CreateXor(Cmp1, Cmp3);
+
+  return nullptr;
+}
+
 // A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
 // track these properities for preservation. Note that we can decompose
 // equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
@@ -3680,63 +3696,73 @@ static Value *foldBitmaskMul(Value *Op0, Value *Op1,
   return nullptr;
 }
 
-/// If IsLogical is true, then the and/or is in select form and the transform
-/// must be poison-safe.
 Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS,
                                         Instruction &I) {
-  if (Value *V = foldBitmaskMul(LHS, RHS, Builder))
-    return V;
+  if (Value *Res = foldBitmaskMul(LHS, RHS, Builder)) {
+    return Res;
+  }
 
   return nullptr;
 }
 
-/// If IsLogical is true, then the and/or is in select form and the transform
-/// must be poison-safe.
-Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
-                                          Instruction &I, bool IsAnd,
-                                          bool IsLogical) {
-  if (!LHS->getType()->isIntOrIntVectorTy(1))
-    return nullptr;
-
-  // handle (roughly):
-  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
-  // (icmp eq (A & B), C) & (icmp eq (A & D), E)
-  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
-                                        SQ.getWithInstruction(&I)))
-    return V;
-
-  if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
-    if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
-      if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
-        return Res;
+Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS,
+                                               Instruction &I) {
 
-  if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
-    if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
-      if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
-        return Res;
+  Value *X, *Y;
+  if (match(RHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+    if (Value *Res = foldDisjointOr(LHS, X, I)) {
+      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
+      Disjoint->setIsDisjoint(true);
+      return cast<Value>(Disjoint);
+    }
+    if (Value *Res = foldDisjointOr(LHS, Y, I)) {
+      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
+      Disjoint->setIsDisjoint(true);
+      return cast<Value>(Disjoint);
+    }
+  }
 
-  if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
-    return Res;
+  if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+    if (Value *Res = foldDisjointOr(X, RHS, I)) {
+      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
+      Disjoint->setIsDisjoint(true);
+      return cast<Value>(Disjoint);
+    }
+    if (Value *Res = foldDisjointOr(Y, RHS, I)) {
+      auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
+      Disjoint->setIsDisjoint(true);
+      return cast<Value>(Disjoint);
+    }
+  }
 
-  return nullptr;
-}
+  Value *X1, *Y1;
+  if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y)))) &&
+      (match(RHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1)))))) {
+    auto TryFold = [this, &I](Value *Op0, Value *Op1, Value *Rem0,
+                              Value *Rem1) -> Value * {
+      if (Value *Res = foldDisjointOr(Op0, Op1, I)) {
+        auto Disjoint =
+            cast<PossiblyDisjointInst>(Builder.CreateOr(Rem0, Rem1));
+        Disjoint->setIsDisjoint(true);
+        auto Disjoint2 =
+            cast<PossiblyDisjointInst>(Builder.CreateOr(Disjoint, Res));
+        return cast<Value>(Disjoint2);
+      }
+      return nullptr;
+    };
 
-static Value *foldOrOfInversions(BinaryOperator &I,
-                                 InstCombiner::BuilderTy &Builder) {
-  assert(I.getOpcode() == Instruction::Or &&
-         "Simplification only supports or at the moment.");
+    if (Value *Res = TryFold(X, X1, Y, Y1))
+      return Res;
 
-  Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
-  if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
-      !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
-    return nullptr;
+    if (Value *Res = TryFold(X, Y1, Y, X1))
+      return Res;
 
-  // Check if any two pairs of the and operations are inversions of each other.
-  if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
-    return Builder.CreateXor(Cmp1, Cmp4);
-  if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
-    return Builder.CreateXor(Cmp1, Cmp3);
+    if (Value *Res = TryFold(Y, X1, X, Y1))
+      return Res;
 
+    if (Value *Res = TryFold(Y, Y1, X, X1))
+      return Res;
+  }
   return nullptr;
 }
 
@@ -3825,23 +3851,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
     if (Value *Res = foldBitmaskMul(I.getOperand(0), I.getOperand(1), Builder))
       return replaceInstUsesWith(I, Res);
 
-    Value *X, *Y;
-    if (match(I.getOperand(1),
-              m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
-      if (auto Res = reassociateBooleanAndOr(
-              I.getOperand(0), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
-              /*RHSIsDisjoint*/ true)) {
-        return replaceInstUsesWith(I, Res);
-      }
-    }
-    if (match(I.getOperand(0),
-              m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
-      if (auto Res = reassociateBooleanAndOr(
-              I.getOperand(1), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
-              /*RHSIsDisjoint*/ true)) {
-        return replaceInstUsesWith(I, Res);
-      }
-    }
+    if (Value *Res = reassociateDisjointOr(I.getOperand(0), I.getOperand(1), I))
+      return replaceInstUsesWith(I, Res);
   }
 
   Value *X, *Y;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 60fbcc4575ff9..831d5b2906425 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -431,11 +431,12 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Value *foldBooleanAndOr(Value *LHS, Value *RHS, Instruction &I, bool IsAnd,
                           bool IsLogical);
 
+  Value *reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, Instruction &I,
+                                 bool IsAnd, bool RHSIsLogical);
+
   Value *foldDisjointOr(Value *LHS, Value *RHS, Instruction &I);
 
-  Value *reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, Instruction &I,
-                                 bool IsAnd, bool RHSIsLogical,
-                                 bool RHSIsDisjoint = false);
+  Value *reassociateDisjointOr(Value *LHS, Value *RHS, Instruction &I);
 
   Instruction *
   canonicalizeConditionalNegationViaMathToSelect(BinaryOperator &i);
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 753ec3507fa4e..d54a75fdd1e14 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -559,6 +559,60 @@ define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
   ret i32 %out
 }
 
+define i32 @unrelated_ops6(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops6(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %in3, %temp
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp4 = or disjoint i32 %in2, %temp2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
+define i32 @unrelated_ops7(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops7(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %in3, %temp
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp4 = or disjoint i32 %temp2, %in2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
+define i32 @unrelated_ops8(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops8(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %1 = and i32 %in, 3
+  %temp = mul nuw nsw i32 %1, 72
+  %temp3 = or disjoint i32 %temp, %in3
+  %2 = and i32 %in, 12
+  %temp2 = mul nuw nsw i32 %2, 72
+  %temp4 = or disjoint i32 %temp2, %in2
+  %out = or disjoint i32 %temp3, %temp4
+  ret i32 %out
+}
+
 define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
 ; CHECK-LABEL: @unrelated_ops_nocombine(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IN:%.*]], 3



More information about the llvm-commits mailing list