[llvm] [InstCombine] Extend bitmask mul combine to handle independent operands (PR #142503)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 7 08:46:18 PDT 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/142503
>From 8545b3b1cedd87f9eed610b2375cc3386406345c Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 2 Jun 2025 12:29:39 -0700
Subject: [PATCH 01/12] [InstCombine] Extend bitmask mul combine to handle
independent operands
Change-Id: Ife1a010d2ae6df40549a6c73f7b893948befa3be
---
.../InstCombine/InstCombineAndOrXor.cpp | 92 +++++++++++++++----
.../test/Transforms/InstCombine/or-bitmask.ll | 50 ++++++++++
2 files changed, 123 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index dd16cfaeecd45..0c58c72106366 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3602,6 +3602,11 @@ struct DecomposedBitMaskMul {
APInt Mask;
bool NUW;
bool NSW;
+
+ bool isCombineableWith(DecomposedBitMaskMul Other) {
+ return X == Other.X && (Mask & Other.Mask).isZero() &&
+ Factor == Other.Factor;
+ }
};
static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
@@ -3659,6 +3664,34 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
return std::nullopt;
}
+using CombinedBitmaskMul =
+ std::pair<std::optional<DecomposedBitMaskMul>, Value *>;
+
+static CombinedBitmaskMul matchCombinedBitmaskMul(Value *V) {
+ auto DecompBitMaskMul = matchBitmaskMul(V);
+ if (DecompBitMaskMul)
+ return {DecompBitMaskMul, nullptr};
+
+ // Otherwise, check the operands of V for bitmaskmul pattern
+ auto BOp = dyn_cast<BinaryOperator>(V);
+ if (!BOp)
+ return {std::nullopt, nullptr};
+
+ auto Disj = dyn_cast<PossiblyDisjointInst>(BOp);
+ if (!Disj || !Disj->isDisjoint())
+ return {std::nullopt, nullptr};
+
+ auto DecompBitMaskMul0 = matchBitmaskMul(BOp->getOperand(0));
+ if (DecompBitMaskMul0)
+ return {DecompBitMaskMul0, BOp->getOperand(1)};
+
+ auto DecompBitMaskMul1 = matchBitmaskMul(BOp->getOperand(1));
+ if (DecompBitMaskMul1)
+ return {DecompBitMaskMul1, BOp->getOperand(0)};
+
+ return {std::nullopt, nullptr};
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -3741,25 +3774,46 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
/*NSW=*/true, /*NUW=*/true))
return R;
- // (A & N) * C + (A & M) * C -> (A & (N + M)) & C
- // This also accepts the equivalent select form of (A & N) * C
- // expressions i.e. !(A & N) ? 0 : N * C)
- auto Decomp1 = matchBitmaskMul(I.getOperand(1));
- if (Decomp1) {
- auto Decomp0 = matchBitmaskMul(I.getOperand(0));
- if (Decomp0 && Decomp0->X == Decomp1->X &&
- (Decomp0->Mask & Decomp1->Mask).isZero() &&
- Decomp0->Factor == Decomp1->Factor) {
-
- Value *NewAnd = Builder.CreateAnd(
- Decomp0->X, ConstantInt::get(Decomp0->X->getType(),
- (Decomp0->Mask + Decomp1->Mask)));
-
- auto *Combined = BinaryOperator::CreateMul(
- NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor));
-
- Combined->setHasNoUnsignedWrap(Decomp0->NUW && Decomp1->NUW);
- Combined->setHasNoSignedWrap(Decomp0->NSW && Decomp1->NSW);
+ // (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
+ // This also accepts the equivalent mul form of (A & N) ? 0 : N * C)
+ // expressions i.e. (A & N) * C
+ CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
+ auto BMDecomp1 = Decomp1.first;
+
+ if (BMDecomp1) {
+ CombinedBitmaskMul Decomp0 = matchCombinedBitmaskMul(I.getOperand(0));
+ auto BMDecomp0 = Decomp0.first;
+
+ if (BMDecomp0 && BMDecomp0->isCombineableWith(*BMDecomp1)) {
+ auto NewAnd = Builder.CreateAnd(
+ BMDecomp0->X,
+ ConstantInt::get(BMDecomp0->X->getType(),
+ (BMDecomp0->Mask + BMDecomp1->Mask)));
+
+ BinaryOperator *Combined = cast<BinaryOperator>(Builder.CreateMul(
+ NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor)));
+
+ Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
+ Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+
+ // If our tree has indepdent or-disjoint operands, bring them in.
+ auto OtherOp0 = Decomp0.second;
+ auto OtherOp1 = Decomp1.second;
+
+ if (OtherOp0 || OtherOp1) {
+ Value *OtherOp;
+ if (OtherOp0 && OtherOp1) {
+ OtherOp = Builder.CreateOr(OtherOp0, OtherOp1);
+ cast<PossiblyDisjointInst>(OtherOp)->setIsDisjoint(true);
+ } else {
+ OtherOp = OtherOp0 ? OtherOp0 : OtherOp1;
+ }
+ Combined = cast<BinaryOperator>(Builder.CreateOr(Combined, OtherOp));
+ cast<PossiblyDisjointInst>(Combined)->setIsDisjoint(true);
+ }
+
+ // Caller expects detached instruction
+ Combined->removeFromParent();
return Combined;
}
}
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 3c992dfea569a..0976b76542f49 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -451,6 +451,56 @@ define i32 @and_mul_non_disjoint(i32 %in) {
ret i32 %out
}
+define i32 @unrelated_ops(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_ops(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp3 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp, %temp3
+ ret i32 %out
+}
+
+define i32 @unrelated_ops1(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_ops1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ ret i32 %out
+}
+
+define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %temp, %in3
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp4 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CONSTSPLAT: {{.*}}
; CONSTVEC: {{.*}}
>From 09fb3f329e9712d352dec32b7f2f1fef1ec9c163 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 12 Jun 2025 08:50:01 -0700
Subject: [PATCH 02/12] Fix comment from bad merge
Change-Id: I879acdf0b17a7110286c6c375410300611c468eb
---
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 0c58c72106366..7e6a8dbfa59c2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3774,9 +3774,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
/*NSW=*/true, /*NUW=*/true))
return R;
- // (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
- // This also accepts the equivalent mul form of (A & N) ? 0 : N * C)
- // expressions i.e. (A & N) * C
+ // (A & N) * C + (A & M) * C -> (A & (N + M)) & C
+ // This also accepts the equivalent select form of (A & N) * C
+ // expressions i.e. !(A & N) ? 0 : N * C)
CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
auto BMDecomp1 = Decomp1.first;
>From 41911d58d2338f3d547171a831055c3faba5dc60 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 23 Jun 2025 11:16:10 -0700
Subject: [PATCH 03/12] Reassociate instead of combine
Change-Id: Ib86e8ed347ef60948c3e4cb44c5fab1c3667afc6
---
.../InstCombine/InstCombineAndOrXor.cpp | 76 ++++++++++---------
.../test/Transforms/InstCombine/or-bitmask.ll | 59 +++++++++++++-
2 files changed, 98 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7e6a8dbfa59c2..dbaafa9791930 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3664,32 +3664,35 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
return std::nullopt;
}
-using CombinedBitmaskMul =
- std::pair<std::optional<DecomposedBitMaskMul>, Value *>;
+struct CombinedBitmaskMul {
+ std::optional<DecomposedBitMaskMul> Decomp = std::nullopt;
+ Value *DecompOp = nullptr;
+ Value *OtherOp = nullptr;
+};
static CombinedBitmaskMul matchCombinedBitmaskMul(Value *V) {
auto DecompBitMaskMul = matchBitmaskMul(V);
if (DecompBitMaskMul)
- return {DecompBitMaskMul, nullptr};
+ return {DecompBitMaskMul, V, nullptr};
// Otherwise, check the operands of V for bitmaskmul pattern
auto BOp = dyn_cast<BinaryOperator>(V);
if (!BOp)
- return {std::nullopt, nullptr};
+ return CombinedBitmaskMul();
auto Disj = dyn_cast<PossiblyDisjointInst>(BOp);
if (!Disj || !Disj->isDisjoint())
- return {std::nullopt, nullptr};
+ return CombinedBitmaskMul();
auto DecompBitMaskMul0 = matchBitmaskMul(BOp->getOperand(0));
if (DecompBitMaskMul0)
- return {DecompBitMaskMul0, BOp->getOperand(1)};
+ return {DecompBitMaskMul0, BOp->getOperand(0), BOp->getOperand(1)};
auto DecompBitMaskMul1 = matchBitmaskMul(BOp->getOperand(1));
if (DecompBitMaskMul1)
- return {DecompBitMaskMul1, BOp->getOperand(0)};
+ return {DecompBitMaskMul1, BOp->getOperand(1), BOp->getOperand(0)};
- return {std::nullopt, nullptr};
+ return CombinedBitmaskMul();
}
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
@@ -3778,43 +3781,44 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// This also accepts the equivalent select form of (A & N) * C
// expressions i.e. !(A & N) ? 0 : N * C)
CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
- auto BMDecomp1 = Decomp1.first;
+ auto BMDecomp1 = Decomp1.Decomp;
if (BMDecomp1) {
CombinedBitmaskMul Decomp0 = matchCombinedBitmaskMul(I.getOperand(0));
- auto BMDecomp0 = Decomp0.first;
-
- if (BMDecomp0 && BMDecomp0->isCombineableWith(*BMDecomp1)) {
- auto NewAnd = Builder.CreateAnd(
- BMDecomp0->X,
- ConstantInt::get(BMDecomp0->X->getType(),
- (BMDecomp0->Mask + BMDecomp1->Mask)));
-
- BinaryOperator *Combined = cast<BinaryOperator>(Builder.CreateMul(
- NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor)));
+ auto BMDecomp0 = Decomp0.Decomp;
- Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
- Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+ if (BMDecomp0) {
+ // If we have independent operands in the BitmaskMul chain, then just
+ // reassociate to encourage combining in future iterations.
+ if (Decomp0.OtherOp || Decomp1.OtherOp) {
+ Value *OtherOp = Decomp0.OtherOp ? Decomp0.OtherOp : Decomp1.OtherOp;
- // If our tree has indepdent or-disjoint operands, bring them in.
- auto OtherOp0 = Decomp0.second;
- auto OtherOp1 = Decomp1.second;
-
- if (OtherOp0 || OtherOp1) {
- Value *OtherOp;
- if (OtherOp0 && OtherOp1) {
- OtherOp = Builder.CreateOr(OtherOp0, OtherOp1);
+ if (Decomp0.OtherOp && Decomp1.OtherOp) {
+ OtherOp = Builder.CreateOr(Decomp0.OtherOp, Decomp1.OtherOp);
cast<PossiblyDisjointInst>(OtherOp)->setIsDisjoint(true);
- } else {
- OtherOp = OtherOp0 ? OtherOp0 : OtherOp1;
}
- Combined = cast<BinaryOperator>(Builder.CreateOr(Combined, OtherOp));
- cast<PossiblyDisjointInst>(Combined)->setIsDisjoint(true);
+
+ auto CombinedOp =
+ Builder.CreateOr(Decomp0.DecompOp, Decomp1.DecompOp);
+ cast<PossiblyDisjointInst>(CombinedOp)->setIsDisjoint(true);
+
+ return BinaryOperator::CreateDisjointOr(CombinedOp, OtherOp);
}
- // Caller expects detached instruction
- Combined->removeFromParent();
- return Combined;
+ if (BMDecomp0->isCombineableWith(*BMDecomp1)) {
+ auto NewAnd = Builder.CreateAnd(
+ BMDecomp0->X,
+ ConstantInt::get(BMDecomp0->X->getType(),
+ (BMDecomp0->Mask + BMDecomp1->Mask)));
+
+ auto *Combined = BinaryOperator::CreateMul(
+ NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor));
+
+ Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
+ Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+
+ return Combined;
+ }
}
}
}
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 0976b76542f49..d3758f6c51963 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -485,9 +485,9 @@ define i32 @unrelated_ops1(i32 %in, i32 %in2) {
define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops2(
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
@@ -501,6 +501,63 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
ret i32 %out
}
+define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops_nocombine(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 7
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %temp, %in3
+ %2 = and i32 %in, 7
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp4 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
+define i32 @unrelated_ops_nocombine1(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops_nocombine1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 36
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %temp, %in3
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 36
+ %temp4 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
+define i32 @no_chain(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @no_chain(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[TEMP]], [[IN3:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[IN2:%.*]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %temp, %in3
+ %out = or disjoint i32 %temp3, %in2
+ ret i32 %out
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CONSTSPLAT: {{.*}}
; CONSTVEC: {{.*}}
>From 495d32e99fe8e0a25618faafd749e5df50756781 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 24 Jun 2025 09:07:33 -0700
Subject: [PATCH 04/12] Default constructor for std::optional
Change-Id: I950ee32ec053430fd51c7fd52645fe52e9e6ecff
---
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index dbaafa9791930..903ab1d8c7d34 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3665,7 +3665,7 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
}
struct CombinedBitmaskMul {
- std::optional<DecomposedBitMaskMul> Decomp = std::nullopt;
+ std::optional<DecomposedBitMaskMul> Decomp;
Value *DecompOp = nullptr;
Value *OtherOp = nullptr;
};
>From 75ec1d725e781ea7d88852c522b35cfb341a770f Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 25 Jun 2025 11:04:57 -0700
Subject: [PATCH 05/12] Add tests for the and-icmp-sel form
Change-Id: I875b9fac4749b3f391efce47f8d3b9e2004de8c2
---
.../test/Transforms/InstCombine/or-bitmask.ll | 58 +++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index d3758f6c51963..c59e598ba6daa 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -501,6 +501,64 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
ret i32 %out
}
+define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops3(
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[TEMP:%.*]], [[IN3:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP2]], [[TEMP3]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 2
+ %cmp = icmp eq i32 %1, 0
+ %temp = select i1 %cmp, i32 0, i32 144
+ %temp3 = or disjoint i32 %temp, %in3
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp4 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
+define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops4(
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i32 [[IN2:%.*]], [[IN3:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 12
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %2 = and i32 %in, 2
+ %cmp = icmp eq i32 %2, 0
+ %temp2 = select i1 %cmp, i32 0, i32 144
+ %temp4 = or disjoint i32 %temp2, %in3
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
+define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops5(
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 6
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 2
+ %cmp = icmp eq i32 %1, 0
+ %temp = select i1 %cmp, i32 0, i32 144
+ %temp3 = or disjoint i32 %temp, %in3
+ %2 = and i32 %in, 4
+ %cmp2 = icmp eq i32 %2, 0
+ %temp2 = select i1 %cmp2, i32 0, i32 288
+ %temp4 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops_nocombine(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
>From 1c0d464cb2628a4fa603c8c5a55f9b94cce65970 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 26 Jun 2025 14:20:25 -0700
Subject: [PATCH 06/12] Refactor for reassociation
Change-Id: Ie86d0e58f7fdb2c0489d3dee3a41ef3911f9477b
---
.../InstCombine/InstCombineAndOrXor.cpp | 250 ++++++++++--------
.../InstCombine/InstCombineInternal.h | 5 +-
.../test/Transforms/InstCombine/or-bitmask.ll | 24 +-
3 files changed, 152 insertions(+), 127 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 903ab1d8c7d34..c851231bdb7a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2365,8 +2365,10 @@ static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp,
/// number of and/or instructions might have to be created.
Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
Instruction &I, bool IsAnd,
- bool RHSIsLogical) {
+ bool RHSIsLogical,
+ bool RHSIsDisjoint) {
Instruction::BinaryOps Opcode = IsAnd ? Instruction::And : Instruction::Or;
+
// LHS bop (X lop Y) --> (LHS bop X) lop Y
// LHS bop (X bop Y) --> (LHS bop X) bop Y
if (Value *Res = foldBooleanAndOr(LHS, X, I, IsAnd, /*IsLogical=*/false))
@@ -2377,6 +2379,40 @@ Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
if (Value *Res = foldBooleanAndOr(LHS, Y, I, IsAnd, /*IsLogical=*/false))
return RHSIsLogical ? Builder.CreateLogicalOp(Opcode, X, Res)
: Builder.CreateBinOp(Opcode, X, Res);
+
+ if (RHSIsDisjoint && !IsAnd && cast<PossiblyDisjointInst>(&I)->isDisjoint()) {
+ if (Value *Res = foldDisjointOr(LHS, X, I)) {
+ auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
+ Disjoint->setIsDisjoint(true);
+ return cast<Value>(Disjoint);
+ }
+ if (Value *Res = foldDisjointOr(LHS, Y, I)) {
+ auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
+ Disjoint->setIsDisjoint(true);
+ return cast<Value>(Disjoint);
+ }
+ Value *X1, *Y1;
+ if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1))))) {
+ auto TryFold = [this, &I](Value *Op0, Value *Op1, Value *Rem0,
+ Value *Rem1) -> Value * {
+ if (Value *Res = foldDisjointOr(Op0, Op1, I)) {
+ auto Disjoint =
+ cast<PossiblyDisjointInst>(Builder.CreateOr(Rem0, Rem1));
+ Disjoint->setIsDisjoint(true);
+ auto Disjoint2 =
+ cast<PossiblyDisjointInst>(Builder.CreateOr(Disjoint, Res));
+ return cast<Value>(Disjoint2);
+ }
+ return nullptr;
+ };
+
+ if (Value *Res = TryFold(X, X1, Y, Y1))
+ return Res;
+
+ if (Value *Res = TryFold(X, Y1, Y, X1))
+ return Res;
+ }
+ }
return nullptr;
}
@@ -3542,55 +3578,6 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
}
-/// If IsLogical is true, then the and/or is in select form and the transform
-/// must be poison-safe.
-Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
- Instruction &I, bool IsAnd,
- bool IsLogical) {
- if (!LHS->getType()->isIntOrIntVectorTy(1))
- return nullptr;
-
- // handle (roughly):
- // (icmp ne (A & B), C) | (icmp ne (A & D), E)
- // (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
- SQ.getWithInstruction(&I)))
- return V;
-
- if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
- if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
- if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
- return Res;
-
- if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
- if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
- if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
- return Res;
-
- if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
- return Res;
-
- return nullptr;
-}
-
-static Value *foldOrOfInversions(BinaryOperator &I,
- InstCombiner::BuilderTy &Builder) {
- assert(I.getOpcode() == Instruction::Or &&
- "Simplification only supports or at the moment.");
-
- Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
- if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
- !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
- return nullptr;
-
- // Check if any two pairs of the and operations are inversions of each other.
- if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
- return Builder.CreateXor(Cmp1, Cmp4);
- if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
- return Builder.CreateXor(Cmp1, Cmp3);
-
- return nullptr;
-}
// A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
// track these properities for preservation. Note that we can decompose
@@ -3664,35 +3651,94 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
return std::nullopt;
}
-struct CombinedBitmaskMul {
- std::optional<DecomposedBitMaskMul> Decomp;
- Value *DecompOp = nullptr;
- Value *OtherOp = nullptr;
-};
+// (A & N) * C + (A & M) * C -> (A & (N + M)) & C
+// This also accepts the equivalent select form of (A & N) * C
+// expressions i.e. !(A & N) ? 0 : N * C)
+static Value *foldBitmaskMul(Value *Op0, Value *Op1,
+ InstCombiner::BuilderTy &Builder) {
+ auto Decomp1 = matchBitmaskMul(Op1);
-static CombinedBitmaskMul matchCombinedBitmaskMul(Value *V) {
- auto DecompBitMaskMul = matchBitmaskMul(V);
- if (DecompBitMaskMul)
- return {DecompBitMaskMul, V, nullptr};
+ if (Decomp1) {
+ auto Decomp0 = matchBitmaskMul(Op0);
- // Otherwise, check the operands of V for bitmaskmul pattern
- auto BOp = dyn_cast<BinaryOperator>(V);
- if (!BOp)
- return CombinedBitmaskMul();
+ if (Decomp0) {
+ // If we have independent operands in the BitmaskMul chain, then just
+ // reassociate to encourage combining in future iterations.
- auto Disj = dyn_cast<PossiblyDisjointInst>(BOp);
- if (!Disj || !Disj->isDisjoint())
- return CombinedBitmaskMul();
+ if (Decomp0->isCombineableWith(*Decomp1)) {
+ auto NewAnd = Builder.CreateAnd(
+ Decomp0->X, ConstantInt::get(Decomp0->X->getType(),
+ (Decomp0->Mask + Decomp1->Mask)));
- auto DecompBitMaskMul0 = matchBitmaskMul(BOp->getOperand(0));
- if (DecompBitMaskMul0)
- return {DecompBitMaskMul0, BOp->getOperand(0), BOp->getOperand(1)};
+ auto Res = Builder.CreateMul(
+ NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor), "",
+ Decomp0->NUW && Decomp1->NUW, Decomp0->NSW && Decomp1->NSW);
+ return Res;
+ }
+ }
+ }
- auto DecompBitMaskMul1 = matchBitmaskMul(BOp->getOperand(1));
- if (DecompBitMaskMul1)
- return {DecompBitMaskMul1, BOp->getOperand(1), BOp->getOperand(0)};
+ return nullptr;
+}
+
+/// If IsLogical is true, then the and/or is in select form and the transform
+/// must be poison-safe.
+Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS,
+ Instruction &I) {
+ if (Value *V = foldBitmaskMul(LHS, RHS, Builder))
+ return V;
- return CombinedBitmaskMul();
+ return nullptr;
+}
+
+/// If IsLogical is true, then the and/or is in select form and the transform
+/// must be poison-safe.
+Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
+ Instruction &I, bool IsAnd,
+ bool IsLogical) {
+ if (!LHS->getType()->isIntOrIntVectorTy(1))
+ return nullptr;
+
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
+ SQ.getWithInstruction(&I)))
+ return V;
+
+ if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
+ if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
+ if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
+ return Res;
+
+ if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
+ if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
+ if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
+ return Res;
+
+ if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
+ return Res;
+
+ return nullptr;
+}
+
+static Value *foldOrOfInversions(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::Or &&
+ "Simplification only supports or at the moment.");
+
+ Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
+ if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
+ !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
+ return nullptr;
+
+ // Check if any two pairs of the and operations are inversions of each other.
+ if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
+ return Builder.CreateXor(Cmp1, Cmp4);
+ if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
+ return Builder.CreateXor(Cmp1, Cmp3);
+
+ return nullptr;
}
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
@@ -3777,48 +3823,24 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
/*NSW=*/true, /*NUW=*/true))
return R;
- // (A & N) * C + (A & M) * C -> (A & (N + M)) & C
- // This also accepts the equivalent select form of (A & N) * C
- // expressions i.e. !(A & N) ? 0 : N * C)
- CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
- auto BMDecomp1 = Decomp1.Decomp;
-
- if (BMDecomp1) {
- CombinedBitmaskMul Decomp0 = matchCombinedBitmaskMul(I.getOperand(0));
- auto BMDecomp0 = Decomp0.Decomp;
-
- if (BMDecomp0) {
- // If we have independent operands in the BitmaskMul chain, then just
- // reassociate to encourage combining in future iterations.
- if (Decomp0.OtherOp || Decomp1.OtherOp) {
- Value *OtherOp = Decomp0.OtherOp ? Decomp0.OtherOp : Decomp1.OtherOp;
-
- if (Decomp0.OtherOp && Decomp1.OtherOp) {
- OtherOp = Builder.CreateOr(Decomp0.OtherOp, Decomp1.OtherOp);
- cast<PossiblyDisjointInst>(OtherOp)->setIsDisjoint(true);
- }
-
- auto CombinedOp =
- Builder.CreateOr(Decomp0.DecompOp, Decomp1.DecompOp);
- cast<PossiblyDisjointInst>(CombinedOp)->setIsDisjoint(true);
-
- return BinaryOperator::CreateDisjointOr(CombinedOp, OtherOp);
- }
-
- if (BMDecomp0->isCombineableWith(*BMDecomp1)) {
- auto NewAnd = Builder.CreateAnd(
- BMDecomp0->X,
- ConstantInt::get(BMDecomp0->X->getType(),
- (BMDecomp0->Mask + BMDecomp1->Mask)));
-
- auto *Combined = BinaryOperator::CreateMul(
- NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor));
-
- Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
- Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+ if (Value *Res = foldBitmaskMul(I.getOperand(0), I.getOperand(1), Builder))
+ return replaceInstUsesWith(I, Res);
- return Combined;
- }
+ Value *X, *Y;
+ if (match(I.getOperand(1),
+ m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+ if (auto Res = reassociateBooleanAndOr(
+ I.getOperand(0), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
+ /*RHSIsDisjoint*/ true)) {
+ return replaceInstUsesWith(I, Res);
+ }
+ }
+ if (match(I.getOperand(0),
+ m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+ if (auto Res = reassociateBooleanAndOr(
+ I.getOperand(1), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
+ /*RHSIsDisjoint*/ true)) {
+ return replaceInstUsesWith(I, Res);
}
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9adde8094d44d..12a94b261e98e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -436,8 +436,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Value *foldBooleanAndOr(Value *LHS, Value *RHS, Instruction &I, bool IsAnd,
bool IsLogical);
+ Value *foldDisjointOr(Value *LHS, Value *RHS, Instruction &I);
+
Value *reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, Instruction &I,
- bool IsAnd, bool RHSIsLogical);
+ bool IsAnd, bool RHSIsLogical,
+ bool RHSIsDisjoint = false);
Instruction *
canonicalizeConditionalNegationViaMathToSelect(BinaryOperator &i);
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index c59e598ba6daa..753ec3507fa4e 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -485,10 +485,10 @@ define i32 @unrelated_ops1(i32 %in, i32 %in2) {
define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops2(
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%1 = and i32 %in, 3
@@ -503,10 +503,10 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops3(
-; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[TEMP:%.*]], [[IN3:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP2]], [[TEMP3]]
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TEMP2]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%1 = and i32 %in, 2
@@ -522,10 +522,10 @@ define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops4(
-; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i32 [[IN2:%.*]], [[IN3:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%1 = and i32 %in, 12
@@ -541,10 +541,10 @@ define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops5(
-; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 6
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%1 = and i32 %in, 2
@@ -563,10 +563,10 @@ define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops_nocombine(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[IN3:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 7
; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP2]]
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
@@ -584,10 +584,10 @@ define i32 @unrelated_ops_nocombine1(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops_nocombine1(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[IN3:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 12
; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 36
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[TEMP]], [[TEMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP2]]
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
>From ba68836a98f8ce6dd50ced49aac6147934e01481 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 26 Jun 2025 17:32:52 -0700
Subject: [PATCH 07/12] Formatting
Change-Id: I2c418b8e5bf7fed050ee77515a73fa4368a1ea7d
---
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c851231bdb7a4..6c8636ea2a1eb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3578,7 +3578,6 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
}
-
// A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
// track these properities for preservation. Note that we can decompose
// equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
>From d2bd64279004f5c23a97bfcec92b59058508c750 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 27 Jun 2025 08:44:17 -0700
Subject: [PATCH 08/12] Introduce reassociateDisjointOr
Change-Id: I172be21fe78361f4520a893d6c97c422accbf13f
---
.../InstCombine/InstCombineAndOrXor.cpp | 203 +++++++++---------
.../InstCombine/InstCombineInternal.h | 7 +-
.../test/Transforms/InstCombine/or-bitmask.ll | 54 +++++
3 files changed, 165 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 6c8636ea2a1eb..1dd4b5aea79c9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2365,8 +2365,7 @@ static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp,
/// number of and/or instructions might have to be created.
Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
Instruction &I, bool IsAnd,
- bool RHSIsLogical,
- bool RHSIsDisjoint) {
+ bool RHSIsLogical) {
Instruction::BinaryOps Opcode = IsAnd ? Instruction::And : Instruction::Or;
// LHS bop (X lop Y) --> (LHS bop X) lop Y
@@ -2380,39 +2379,6 @@ Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
return RHSIsLogical ? Builder.CreateLogicalOp(Opcode, X, Res)
: Builder.CreateBinOp(Opcode, X, Res);
- if (RHSIsDisjoint && !IsAnd && cast<PossiblyDisjointInst>(&I)->isDisjoint()) {
- if (Value *Res = foldDisjointOr(LHS, X, I)) {
- auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
- Disjoint->setIsDisjoint(true);
- return cast<Value>(Disjoint);
- }
- if (Value *Res = foldDisjointOr(LHS, Y, I)) {
- auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
- Disjoint->setIsDisjoint(true);
- return cast<Value>(Disjoint);
- }
- Value *X1, *Y1;
- if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1))))) {
- auto TryFold = [this, &I](Value *Op0, Value *Op1, Value *Rem0,
- Value *Rem1) -> Value * {
- if (Value *Res = foldDisjointOr(Op0, Op1, I)) {
- auto Disjoint =
- cast<PossiblyDisjointInst>(Builder.CreateOr(Rem0, Rem1));
- Disjoint->setIsDisjoint(true);
- auto Disjoint2 =
- cast<PossiblyDisjointInst>(Builder.CreateOr(Disjoint, Res));
- return cast<Value>(Disjoint2);
- }
- return nullptr;
- };
-
- if (Value *Res = TryFold(X, X1, Y, Y1))
- return Res;
-
- if (Value *Res = TryFold(X, Y1, Y, X1))
- return Res;
- }
- }
return nullptr;
}
@@ -3578,6 +3544,56 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
}
+/// If IsLogical is true, then the and/or is in select form and the transform
+/// must be poison-safe.
+Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
+ Instruction &I, bool IsAnd,
+ bool IsLogical) {
+ if (!LHS->getType()->isIntOrIntVectorTy(1))
+ return nullptr;
+
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
+ SQ.getWithInstruction(&I)))
+ return V;
+
+ if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
+ if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
+ if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
+ return Res;
+
+ if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
+ if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
+ if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
+ return Res;
+
+ if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
+ return Res;
+
+ return nullptr;
+}
+
+static Value *foldOrOfInversions(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::Or &&
+ "Simplification only supports or at the moment.");
+
+ Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
+ if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
+ !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
+ return nullptr;
+
+ // Check if any two pairs of the and operations are inversions of each other.
+ if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
+ return Builder.CreateXor(Cmp1, Cmp4);
+ if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
+ return Builder.CreateXor(Cmp1, Cmp3);
+
+ return nullptr;
+}
+
// A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
// track these properities for preservation. Note that we can decompose
// equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
@@ -3680,63 +3696,73 @@ static Value *foldBitmaskMul(Value *Op0, Value *Op1,
return nullptr;
}
-/// If IsLogical is true, then the and/or is in select form and the transform
-/// must be poison-safe.
Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS,
Instruction &I) {
- if (Value *V = foldBitmaskMul(LHS, RHS, Builder))
- return V;
+ if (Value *Res = foldBitmaskMul(LHS, RHS, Builder)) {
+ return Res;
+ }
return nullptr;
}
-/// If IsLogical is true, then the and/or is in select form and the transform
-/// must be poison-safe.
-Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
- Instruction &I, bool IsAnd,
- bool IsLogical) {
- if (!LHS->getType()->isIntOrIntVectorTy(1))
- return nullptr;
-
- // handle (roughly):
- // (icmp ne (A & B), C) | (icmp ne (A & D), E)
- // (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder,
- SQ.getWithInstruction(&I)))
- return V;
-
- if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
- if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS))
- if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical))
- return Res;
+Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS,
+ Instruction &I) {
- if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS))
- if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS))
- if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical))
- return Res;
+ Value *X, *Y;
+ if (match(RHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+ if (Value *Res = foldDisjointOr(LHS, X, I)) {
+ auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
+ Disjoint->setIsDisjoint(true);
+ return cast<Value>(Disjoint);
+ }
+ if (Value *Res = foldDisjointOr(LHS, Y, I)) {
+ auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
+ Disjoint->setIsDisjoint(true);
+ return cast<Value>(Disjoint);
+ }
+ }
- if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
- return Res;
+ if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
+ if (Value *Res = foldDisjointOr(X, RHS, I)) {
+ auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
+ Disjoint->setIsDisjoint(true);
+ return cast<Value>(Disjoint);
+ }
+ if (Value *Res = foldDisjointOr(Y, RHS, I)) {
+ auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
+ Disjoint->setIsDisjoint(true);
+ return cast<Value>(Disjoint);
+ }
+ }
- return nullptr;
-}
+ Value *X1, *Y1;
+ if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y)))) &&
+ (match(RHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1)))))) {
+ auto TryFold = [this, &I](Value *Op0, Value *Op1, Value *Rem0,
+ Value *Rem1) -> Value * {
+ if (Value *Res = foldDisjointOr(Op0, Op1, I)) {
+ auto Disjoint =
+ cast<PossiblyDisjointInst>(Builder.CreateOr(Rem0, Rem1));
+ Disjoint->setIsDisjoint(true);
+ auto Disjoint2 =
+ cast<PossiblyDisjointInst>(Builder.CreateOr(Disjoint, Res));
+ return cast<Value>(Disjoint2);
+ }
+ return nullptr;
+ };
-static Value *foldOrOfInversions(BinaryOperator &I,
- InstCombiner::BuilderTy &Builder) {
- assert(I.getOpcode() == Instruction::Or &&
- "Simplification only supports or at the moment.");
+ if (Value *Res = TryFold(X, X1, Y, Y1))
+ return Res;
- Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;
- if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||
- !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))
- return nullptr;
+ if (Value *Res = TryFold(X, Y1, Y, X1))
+ return Res;
- // Check if any two pairs of the and operations are inversions of each other.
- if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))
- return Builder.CreateXor(Cmp1, Cmp4);
- if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))
- return Builder.CreateXor(Cmp1, Cmp3);
+ if (Value *Res = TryFold(Y, X1, X, Y1))
+ return Res;
+ if (Value *Res = TryFold(Y, Y1, X, X1))
+ return Res;
+ }
return nullptr;
}
@@ -3825,23 +3851,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *Res = foldBitmaskMul(I.getOperand(0), I.getOperand(1), Builder))
return replaceInstUsesWith(I, Res);
- Value *X, *Y;
- if (match(I.getOperand(1),
- m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
- if (auto Res = reassociateBooleanAndOr(
- I.getOperand(0), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
- /*RHSIsDisjoint*/ true)) {
- return replaceInstUsesWith(I, Res);
- }
- }
- if (match(I.getOperand(0),
- m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
- if (auto Res = reassociateBooleanAndOr(
- I.getOperand(1), X, Y, I, /*IsAnd=*/false, /*RHSIsLogical=*/true,
- /*RHSIsDisjoint*/ true)) {
- return replaceInstUsesWith(I, Res);
- }
- }
+ if (Value *Res = reassociateDisjointOr(I.getOperand(0), I.getOperand(1), I))
+ return replaceInstUsesWith(I, Res);
}
Value *X, *Y;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 12a94b261e98e..751bf89363efc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -436,11 +436,12 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Value *foldBooleanAndOr(Value *LHS, Value *RHS, Instruction &I, bool IsAnd,
bool IsLogical);
+ Value *reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, Instruction &I,
+ bool IsAnd, bool RHSIsLogical);
+
Value *foldDisjointOr(Value *LHS, Value *RHS, Instruction &I);
- Value *reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, Instruction &I,
- bool IsAnd, bool RHSIsLogical,
- bool RHSIsDisjoint = false);
+ Value *reassociateDisjointOr(Value *LHS, Value *RHS, Instruction &I);
Instruction *
canonicalizeConditionalNegationViaMathToSelect(BinaryOperator &i);
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 753ec3507fa4e..d54a75fdd1e14 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -559,6 +559,60 @@ define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
ret i32 %out
}
+define i32 @unrelated_ops6(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops6(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %in3, %temp
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp4 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
+define i32 @unrelated_ops7(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops7(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %in3, %temp
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp4 = or disjoint i32 %temp2, %in2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
+define i32 @unrelated_ops8(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops8(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %temp, %in3
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp4 = or disjoint i32 %temp2, %in2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops_nocombine(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
>From c1d867c468de415f61073d077d0961c1fc885bdc Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 1 Jul 2025 07:56:15 -0700
Subject: [PATCH 09/12] Review comments
Change-Id: I4937190c734c5adcdd73d81278d82a53aec4c4c9
---
.../InstCombine/InstCombineAndOrXor.cpp | 57 ++++------
.../InstCombine/InstCombineInternal.h | 4 +-
.../test/Transforms/InstCombine/or-bitmask.ll | 106 +++++++++---------
3 files changed, 74 insertions(+), 93 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 1dd4b5aea79c9..78ab22dc4c886 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2367,7 +2367,6 @@ Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
Instruction &I, bool IsAnd,
bool RHSIsLogical) {
Instruction::BinaryOps Opcode = IsAnd ? Instruction::And : Instruction::Or;
-
// LHS bop (X lop Y) --> (LHS bop X) lop Y
// LHS bop (X bop Y) --> (LHS bop X) bop Y
if (Value *Res = foldBooleanAndOr(LHS, X, I, IsAnd, /*IsLogical=*/false))
@@ -3666,9 +3665,9 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
return std::nullopt;
}
-// (A & N) * C + (A & M) * C -> (A & (N + M)) & C
-// This also accepts the equivalent select form of (A & N) * C
-// expressions i.e. !(A & N) ? 0 : N * C)
+/// (A & N) * C + (A & M) * C -> (A & (N + M)) & C
+/// This also accepts the equivalent select form of (A & N) * C
+/// expressions i.e. !(A & N) ? 0 : N * C)
static Value *foldBitmaskMul(Value *Op0, Value *Op1,
InstCombiner::BuilderTy &Builder) {
auto Decomp1 = matchBitmaskMul(Op1);
@@ -3696,8 +3695,7 @@ static Value *foldBitmaskMul(Value *Op0, Value *Op1,
return nullptr;
}
-Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS,
- Instruction &I) {
+Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS) {
if (Value *Res = foldBitmaskMul(LHS, RHS, Builder)) {
return Res;
}
@@ -3705,48 +3703,31 @@ Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS,
return nullptr;
}
-Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS,
- Instruction &I) {
+Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS) {
Value *X, *Y;
if (match(RHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
- if (Value *Res = foldDisjointOr(LHS, X, I)) {
- auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
- Disjoint->setIsDisjoint(true);
- return cast<Value>(Disjoint);
- }
- if (Value *Res = foldDisjointOr(LHS, Y, I)) {
- auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
- Disjoint->setIsDisjoint(true);
- return cast<Value>(Disjoint);
- }
+ if (Value *Res = foldDisjointOr(LHS, X))
+ return Builder.CreateOr(Res, Y, "", /*IsDisjoint=*/true);
+ if (Value *Res = foldDisjointOr(LHS, Y))
+ return Builder.CreateOr(Res, X, "", /*IsDisjoint=*/true);
}
if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))))) {
- if (Value *Res = foldDisjointOr(X, RHS, I)) {
- auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, Y));
- Disjoint->setIsDisjoint(true);
- return cast<Value>(Disjoint);
- }
- if (Value *Res = foldDisjointOr(Y, RHS, I)) {
- auto Disjoint = cast<PossiblyDisjointInst>(Builder.CreateOr(Res, X));
- Disjoint->setIsDisjoint(true);
- return cast<Value>(Disjoint);
- }
+ if (Value *Res = foldDisjointOr(X, RHS))
+ return Builder.CreateOr(Res, Y, "", /*IsDisjoint=*/true);
+ if (Value *Res = foldDisjointOr(Y, RHS))
+ return Builder.CreateOr(Res, X, "", /*IsDisjoint=*/true);
}
Value *X1, *Y1;
if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y)))) &&
(match(RHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1)))))) {
- auto TryFold = [this, &I](Value *Op0, Value *Op1, Value *Rem0,
- Value *Rem1) -> Value * {
- if (Value *Res = foldDisjointOr(Op0, Op1, I)) {
- auto Disjoint =
- cast<PossiblyDisjointInst>(Builder.CreateOr(Rem0, Rem1));
- Disjoint->setIsDisjoint(true);
- auto Disjoint2 =
- cast<PossiblyDisjointInst>(Builder.CreateOr(Disjoint, Res));
- return cast<Value>(Disjoint2);
+ auto TryFold = [this](Value *Op0, Value *Op1, Value *Rem0,
+ Value *Rem1) -> Value * {
+ if (Value *Res = foldDisjointOr(Op0, Op1)) {
+ auto Disjoint = Builder.CreateOr(Rem0, Rem1, "", /*IsDisjoint=*/true);
+ return Builder.CreateOr(Disjoint, Res, "", /*IsDisjoint=*/true);
}
return nullptr;
};
@@ -3851,7 +3832,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *Res = foldBitmaskMul(I.getOperand(0), I.getOperand(1), Builder))
return replaceInstUsesWith(I, Res);
- if (Value *Res = reassociateDisjointOr(I.getOperand(0), I.getOperand(1), I))
+ if (Value *Res = reassociateDisjointOr(I.getOperand(0), I.getOperand(1)))
return replaceInstUsesWith(I, Res);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 751bf89363efc..1b963952f1d61 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -439,9 +439,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Value *reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, Instruction &I,
bool IsAnd, bool RHSIsLogical);
- Value *foldDisjointOr(Value *LHS, Value *RHS, Instruction &I);
+ Value *foldDisjointOr(Value *LHS, Value *RHS);
- Value *reassociateDisjointOr(Value *LHS, Value *RHS, Instruction &I);
+ Value *reassociateDisjointOr(Value *LHS, Value *RHS);
Instruction *
canonicalizeConditionalNegationViaMathToSelect(BinaryOperator &i);
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index d54a75fdd1e14..a1fb5d82c6915 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -458,10 +458,10 @@ define i32 @unrelated_ops(i32 %in, i32 %in2) {
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp2
%out = or disjoint i32 %temp, %temp3
ret i32 %out
@@ -474,10 +474,10 @@ define i32 @unrelated_ops1(i32 %in, i32 %in2) {
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
ret i32 %out
@@ -488,14 +488,14 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %temp, %in3
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp4 = or disjoint i32 %in2, %temp2
%out = or disjoint i32 %temp3, %temp4
ret i32 %out
@@ -506,15 +506,15 @@ define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TEMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TEMP2]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 2
- %cmp = icmp eq i32 %1, 0
+ %and0 = and i32 %in, 2
+ %cmp = icmp eq i32 %and0, 0
%temp = select i1 %cmp, i32 0, i32 144
%temp3 = or disjoint i32 %temp, %in3
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp4 = or disjoint i32 %in2, %temp2
%out = or disjoint i32 %temp3, %temp4
ret i32 %out
@@ -525,14 +525,14 @@ define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 12
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 12
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %in2, %temp
- %2 = and i32 %in, 2
- %cmp = icmp eq i32 %2, 0
+ %and1 = and i32 %in, 2
+ %cmp = icmp eq i32 %and1, 0
%temp2 = select i1 %cmp, i32 0, i32 144
%temp4 = or disjoint i32 %temp2, %in3
%out = or disjoint i32 %temp3, %temp4
@@ -544,15 +544,15 @@ define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 6
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 2
- %cmp = icmp eq i32 %1, 0
+ %and0 = and i32 %in, 2
+ %cmp = icmp eq i32 %and0, 0
%temp = select i1 %cmp, i32 0, i32 144
%temp3 = or disjoint i32 %temp, %in3
- %2 = and i32 %in, 4
- %cmp2 = icmp eq i32 %2, 0
+ %and1 = and i32 %in, 4
+ %cmp2 = icmp eq i32 %and1, 0
%temp2 = select i1 %cmp2, i32 0, i32 288
%temp4 = or disjoint i32 %in2, %temp2
%out = or disjoint i32 %temp3, %temp4
@@ -564,14 +564,14 @@ define i32 @unrelated_ops6(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %in3, %temp
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp4 = or disjoint i32 %in2, %temp2
%out = or disjoint i32 %temp3, %temp4
ret i32 %out
@@ -582,14 +582,14 @@ define i32 @unrelated_ops7(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %in3, %temp
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp4 = or disjoint i32 %temp2, %in2
%out = or disjoint i32 %temp3, %temp4
ret i32 %out
@@ -600,14 +600,14 @@ define i32 @unrelated_ops8(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %temp, %in3
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp4 = or disjoint i32 %temp2, %in2
%out = or disjoint i32 %temp3, %temp4
ret i32 %out
@@ -624,11 +624,11 @@ define i32 @unrelated_ops_nocombine(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %temp, %in3
- %2 = and i32 %in, 7
- %temp2 = mul nuw nsw i32 %2, 72
+ %and1 = and i32 %in, 7
+ %temp2 = mul nuw nsw i32 %and1, 72
%temp4 = or disjoint i32 %in2, %temp2
%out = or disjoint i32 %temp3, %temp4
ret i32 %out
@@ -645,11 +645,11 @@ define i32 @unrelated_ops_nocombine1(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %temp, %in3
- %2 = and i32 %in, 12
- %temp2 = mul nuw nsw i32 %2, 36
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 36
%temp4 = or disjoint i32 %in2, %temp2
%out = or disjoint i32 %temp3, %temp4
ret i32 %out
@@ -663,8 +663,8 @@ define i32 @no_chain(i32 %in, i32 %in2, i32 %in3) {
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[IN2:%.*]]
; CHECK-NEXT: ret i32 [[OUT]]
;
- %1 = and i32 %in, 3
- %temp = mul nuw nsw i32 %1, 72
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
%temp3 = or disjoint i32 %temp, %in3
%out = or disjoint i32 %temp3, %in2
ret i32 %out
>From 8587a7cf1ffc5f796abd490fe8838f00f0918103 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 1 Jul 2025 12:04:24 -0700
Subject: [PATCH 10/12] Remove aggressive reassociation
Change-Id: I72b20a2695eb8de9ac24578729e77c0f3b3cd8aa
---
.../InstCombine/InstCombineAndOrXor.cpp | 24 -------
.../test/Transforms/InstCombine/or-bitmask.ll | 67 +++++++++++++------
2 files changed, 46 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 78ab22dc4c886..17bd83d589faa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3720,30 +3720,6 @@ Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS) {
return Builder.CreateOr(Res, X, "", /*IsDisjoint=*/true);
}
- Value *X1, *Y1;
- if (match(LHS, m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y)))) &&
- (match(RHS, m_OneUse(m_DisjointOr(m_Value(X1), m_Value(Y1)))))) {
- auto TryFold = [this](Value *Op0, Value *Op1, Value *Rem0,
- Value *Rem1) -> Value * {
- if (Value *Res = foldDisjointOr(Op0, Op1)) {
- auto Disjoint = Builder.CreateOr(Rem0, Rem1, "", /*IsDisjoint=*/true);
- return Builder.CreateOr(Disjoint, Res, "", /*IsDisjoint=*/true);
- }
- return nullptr;
- };
-
- if (Value *Res = TryFold(X, X1, Y, Y1))
- return Res;
-
- if (Value *Res = TryFold(X, Y1, Y, X1))
- return Res;
-
- if (Value *Res = TryFold(Y, X1, X, Y1))
- return Res;
-
- if (Value *Res = TryFold(Y, Y1, X, X1))
- return Res;
- }
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index a1fb5d82c6915..b10aff96d0a7f 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -485,10 +485,13 @@ define i32 @unrelated_ops1(i32 %in, i32 %in2) {
define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops2(
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[TMP2]], [[IN4:%.*]]
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[IN2:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -503,10 +506,14 @@ define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops3(
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND0]], 0
+; CHECK-NEXT: [[TEMP:%.*]] = select i1 [[CMP]], i32 0, i32 144
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[TEMP]], [[IN3:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 12
; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TEMP2]]
+; CHECK-NEXT: [[TEMP4:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TEMP4]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 2
@@ -522,10 +529,14 @@ define i32 @unrelated_ops3(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops4(
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 14
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 12
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN4:%.*]], [[TMP3]]
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND1]], 0
+; CHECK-NEXT: [[IN3:%.*]] = select i1 [[CMP]], i32 0, i32 144
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TMP4]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 12
@@ -541,9 +552,14 @@ define i32 @unrelated_ops4(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops5(
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 6
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND0]], 0
+; CHECK-NEXT: [[IN3:%.*]] = select i1 [[CMP]], i32 0, i32 144
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[IN3]], [[IN2:%.*]]
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[AND1]], 0
+; CHECK-NEXT: [[TEMP2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN4:%.*]], [[TEMP2]]
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
@@ -561,10 +577,13 @@ define i32 @unrelated_ops5(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops6(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops6(
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN4:%.*]], [[TMP2]]
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[IN2:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -579,10 +598,13 @@ define i32 @unrelated_ops6(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops7(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops7(
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN4:%.*]], [[TMP2]]
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[IN3:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -597,10 +619,13 @@ define i32 @unrelated_ops7(i32 %in, i32 %in2, i32 %in3) {
define i32 @unrelated_ops8(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @unrelated_ops8(
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[TMP2]], [[IN4:%.*]]
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[IN3:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
>From c1781401e915ede4d4787f9f76e88c16b5ac103e Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Sun, 6 Jul 2025 10:40:47 -0700
Subject: [PATCH 11/12] Review Comments
Change-Id: I24f6316b71fffaadac0e1cd4a9158d985f184664
---
.../InstCombine/InstCombineAndOrXor.cpp | 36 ++--
.../test/Transforms/InstCombine/or-bitmask.ll | 196 ++++++++++++++++++
2 files changed, 211 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 17bd83d589faa..706cb828acc63 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2377,7 +2377,6 @@ Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y,
if (Value *Res = foldBooleanAndOr(LHS, Y, I, IsAnd, /*IsLogical=*/false))
return RHSIsLogical ? Builder.CreateLogicalOp(Opcode, X, Res)
: Builder.CreateBinOp(Opcode, X, Res);
-
return nullptr;
}
@@ -3604,8 +3603,8 @@ struct DecomposedBitMaskMul {
bool NUW;
bool NSW;
- bool isCombineableWith(DecomposedBitMaskMul Other) {
- return X == Other.X && (Mask & Other.Mask).isZero() &&
+ bool isCombineableWith(const DecomposedBitMaskMul Other) {
+ return X == Other.X && !Mask.intersects(Other.Mask) &&
Factor == Other.Factor;
}
};
@@ -3671,34 +3670,29 @@ static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
static Value *foldBitmaskMul(Value *Op0, Value *Op1,
InstCombiner::BuilderTy &Builder) {
auto Decomp1 = matchBitmaskMul(Op1);
+ if (!Decomp1)
+ return nullptr;
- if (Decomp1) {
- auto Decomp0 = matchBitmaskMul(Op0);
-
- if (Decomp0) {
- // If we have independent operands in the BitmaskMul chain, then just
- // reassociate to encourage combining in future iterations.
+ auto Decomp0 = matchBitmaskMul(Op0);
+ if (!Decomp0)
+ return nullptr;
- if (Decomp0->isCombineableWith(*Decomp1)) {
- auto NewAnd = Builder.CreateAnd(
- Decomp0->X, ConstantInt::get(Decomp0->X->getType(),
- (Decomp0->Mask + Decomp1->Mask)));
+ if (Decomp0->isCombineableWith(*Decomp1)) {
+ Value *NewAnd = Builder.CreateAnd(
+ Decomp0->X,
+ ConstantInt::get(Decomp0->X->getType(), Decomp0->Mask + Decomp1->Mask));
- auto Res = Builder.CreateMul(
- NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor), "",
- Decomp0->NUW && Decomp1->NUW, Decomp0->NSW && Decomp1->NSW);
- return Res;
- }
- }
+ return Builder.CreateMul(
+ NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor), "",
+ Decomp0->NUW && Decomp1->NUW, Decomp0->NSW && Decomp1->NSW);
}
return nullptr;
}
Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS) {
- if (Value *Res = foldBitmaskMul(LHS, RHS, Builder)) {
+ if (Value *Res = foldBitmaskMul(LHS, RHS, Builder))
return Res;
- }
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index b10aff96d0a7f..bc2e33dc9c111 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -680,6 +680,202 @@ define i32 @unrelated_ops_nocombine1(i32 %in, i32 %in2, i32 %in3) {
ret i32 %out
}
+define i32 @unrelated_nondisjoint(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_nondisjoint(
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[AND0]], 72
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP]]
+; CHECK-NEXT: [[OUT:%.*]] = or i32 [[TEMP3]], [[TEMP2]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or i32 %temp3, %temp2
+ ret i32 %out
+}
+
+define i32 @unrelated_nondisjoint1(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_nondisjoint1(
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[AND0]], 72
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TEMP3:%.*]] = or i32 [[IN2:%.*]], [[TEMP]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TEMP2]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ ret i32 %out
+}
+
+define i32 @multi_use(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %out)
+ ret i32 %out
+}
+
+define i32 @multi_use1(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use1(
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[AND0]], 72
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TEMP2]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %temp3)
+ ret i32 %out
+}
+
+define i32 @multi_use2(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use2(
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[AND0]], 72
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN]], 12
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TEMP2]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %temp3)
+ ret i32 %out
+}
+
+define i32 @multi_use3(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use3(
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN:%.*]], 12
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[AND1]], 72
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %temp2)
+ ret i32 %out
+}
+
+define i32 @multi_use4(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use4(
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[AND0]], 72
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %temp)
+ ret i32 %out
+}
+
+define i32 @multi_use5(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use5(
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[IN:%.*]], 12
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %and1)
+ ret i32 %out
+}
+
+define i32 @multi_use6(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use6(
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[AND0]], 72
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %temp)
+ ret i32 %out
+}
+
+define i32 @multi_use7(i32 %in, i32 %in2) {
+; CHECK-LABEL: @multi_use7(
+; CHECK-NEXT: [[AND0:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %and0 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %and0, 72
+ %and1 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %and1, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ call void asm sideeffect "; use $0", "{}"(i32 %and0)
+ ret i32 %out
+}
+
define i32 @no_chain(i32 %in, i32 %in2, i32 %in3) {
; CHECK-LABEL: @no_chain(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
>From bd73d37ddc698ebf23c60b7b0e4fa13e5125d106 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 7 Jul 2025 08:44:44 -0700
Subject: [PATCH 12/12] Review comments
Change-Id: I296804c2599419d4121ae4c88724ef09ddb5b4fd
---
.../test/Transforms/InstCombine/or-bitmask.ll | 34 ++++++++++---------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index bc2e33dc9c111..2ee34cdc034ad 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -718,12 +718,14 @@ define i32 @unrelated_nondisjoint1(i32 %in, i32 %in2) {
ret i32 %out
}
+declare void @use(i32)
+
define i32 @multi_use(i32 %in, i32 %in2) {
; CHECK-LABEL: @multi_use(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[OUT]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -732,7 +734,7 @@ define i32 @multi_use(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %out)
+ call void @use(i32 %out)
ret i32 %out
}
@@ -744,7 +746,7 @@ define i32 @multi_use1(i32 %in, i32 %in2) {
; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[AND1]], 72
; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP]]
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TEMP2]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[TEMP3]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -753,7 +755,7 @@ define i32 @multi_use1(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %temp3)
+ call void @use(i32 %temp3)
ret i32 %out
}
@@ -765,7 +767,7 @@ define i32 @multi_use2(i32 %in, i32 %in2) {
; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[AND1]], 72
; CHECK-NEXT: [[TEMP3:%.*]] = or disjoint i32 [[IN2:%.*]], [[TEMP]]
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP3]], [[TEMP2]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[TEMP3]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -774,7 +776,7 @@ define i32 @multi_use2(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %temp3)
+ call void @use(i32 %temp3)
ret i32 %out
}
@@ -785,7 +787,7 @@ define i32 @multi_use3(i32 %in, i32 %in2) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[TEMP2]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -794,7 +796,7 @@ define i32 @multi_use3(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %temp2)
+ call void @use(i32 %temp2)
ret i32 %out
}
@@ -805,7 +807,7 @@ define i32 @multi_use4(i32 %in, i32 %in2) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[TEMP]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -814,7 +816,7 @@ define i32 @multi_use4(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %temp)
+ call void @use(i32 %temp)
ret i32 %out
}
@@ -824,7 +826,7 @@ define i32 @multi_use5(i32 %in, i32 %in2) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[AND1]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -833,7 +835,7 @@ define i32 @multi_use5(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %and1)
+ call void @use(i32 %and1)
ret i32 %out
}
@@ -844,7 +846,7 @@ define i32 @multi_use6(i32 %in, i32 %in2) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[TEMP]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -853,7 +855,7 @@ define i32 @multi_use6(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %temp)
+ call void @use(i32 %temp)
ret i32 %out
}
@@ -863,7 +865,7 @@ define i32 @multi_use7(i32 %in, i32 %in2) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN]], 15
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
-; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use(i32 [[AND0]])
; CHECK-NEXT: ret i32 [[OUT]]
;
%and0 = and i32 %in, 3
@@ -872,7 +874,7 @@ define i32 @multi_use7(i32 %in, i32 %in2) {
%temp2 = mul nuw nsw i32 %and1, 72
%temp3 = or disjoint i32 %in2, %temp
%out = or disjoint i32 %temp3, %temp2
- call void asm sideeffect "; use $0", "{}"(i32 %and0)
+ call void @use(i32 %and0)
ret i32 %out
}
More information about the llvm-commits
mailing list