[llvm] [SelectionDAG] Make `(a & x) | (~a & y) -> (a & (x ^ y)) ^ y` available for all targets (PR #137641)
Iris Shi via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 28 07:44:50 PDT 2025
https://github.com/el-ev created https://github.com/llvm/llvm-project/pull/137641
Closes #83637.
By the way, based on the tests, the existing folding on the X86 target doesn't seem to be working.
>From f800576a7b0cd848d093bdc25cde7273421868cb Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Mon, 28 Apr 2025 22:30:56 +0800
Subject: [PATCH 1/2] pre-commit test
---
llvm/test/CodeGen/RISCV/fold-masked-merge.ll | 310 +++++++++++++++++++
1 file changed, 310 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/fold-masked-merge.ll
diff --git a/llvm/test/CodeGen/RISCV/fold-masked-merge.ll b/llvm/test/CodeGen/RISCV/fold-masked-merge.ll
new file mode 100644
index 0000000000000..9987ea6e8352c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fold-masked-merge.ll
@@ -0,0 +1,310 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV32,RV32I
+; RUN: llc -mtriple=riscv64 < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV64,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+zbb < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV32,RV32ZBB
+; RUN: llc -mtriple=riscv64 -mattr=+zbb < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV64,RV64ZBB
+;
+; test that masked-merge code is generated as "xor;and;xor" sequence or
+; "andn ; and; or" if and-not is available.
+
+define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-I-LABEL: masked_merge0:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: and a1, a0, a1
+; CHECK-I-NEXT: not a0, a0
+; CHECK-I-NEXT: and a0, a0, a2
+; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: masked_merge0:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: and a1, a0, a1
+; CHECK-ZBB-NEXT: andn a0, a2, a0
+; CHECK-ZBB-NEXT: or a0, a1, a0
+; CHECK-ZBB-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not = xor i32 %a0, -1
+ %and1 = and i32 %not, %a2
+ %or = or i32 %and0, %and1
+ ret i32 %or
+}
+
+define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
+; CHECK-I-LABEL: masked_merge1:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: and a1, a0, a1
+; CHECK-I-NEXT: not a0, a0
+; CHECK-I-NEXT: and a0, a2, a0
+; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: masked_merge1:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: and a1, a0, a1
+; CHECK-ZBB-NEXT: andn a0, a2, a0
+; CHECK-ZBB-NEXT: or a0, a1, a0
+; CHECK-ZBB-NEXT: ret
+ %and0 = and i16 %a0, %a1
+ %not = xor i16 %a0, -1
+ %and1 = and i16 %a2, %not
+ %or = or i16 %and0, %and1
+ ret i16 %or
+}
+
+define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
+; CHECK-I-LABEL: masked_merge2:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: not a2, a0
+; CHECK-I-NEXT: and a2, a2, a1
+; CHECK-I-NEXT: and a0, a1, a0
+; CHECK-I-NEXT: or a0, a2, a0
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: masked_merge2:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: andn a2, a1, a0
+; CHECK-ZBB-NEXT: and a0, a1, a0
+; CHECK-ZBB-NEXT: or a0, a2, a0
+; CHECK-ZBB-NEXT: ret
+ %not = xor i8 %a0, -1
+ %and0 = and i8 %not, %a1
+ %and1 = and i8 %a1, %a0
+ %or = or i8 %and0, %and1
+ ret i8 %or
+}
+
+define i64 @masked_merge3(i64 %a0, i64 %a1, i64 %a2) {
+; RV32I-LABEL: masked_merge3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a4, a4
+; RV32I-NEXT: not a5, a5
+; RV32I-NEXT: not a6, a0
+; RV32I-NEXT: not a7, a1
+; RV32I-NEXT: and a5, a7, a5
+; RV32I-NEXT: and a4, a6, a4
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: or a1, a5, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: masked_merge3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: not a3, a0
+; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: ret
+;
+; RV32ZBB-LABEL: masked_merge3:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: not a6, a0
+; RV32ZBB-NEXT: not a7, a1
+; RV32ZBB-NEXT: andn a1, a1, a3
+; RV32ZBB-NEXT: andn a0, a0, a2
+; RV32ZBB-NEXT: andn a2, a7, a5
+; RV32ZBB-NEXT: andn a3, a6, a4
+; RV32ZBB-NEXT: or a0, a3, a0
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64ZBB-LABEL: masked_merge3:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: not a3, a0
+; RV64ZBB-NEXT: andn a2, a3, a2
+; RV64ZBB-NEXT: andn a0, a0, a1
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: ret
+ %v0 = xor i64 %a1, -1
+ %v1 = xor i64 %a2, -1
+ %not = xor i64 %a0, -1
+ %and0 = and i64 %not, %v1
+ %and1 = and i64 %v0, %a0
+ %or = or i64 %and0, %and1
+ ret i64 %or
+}
+
+define i32 @not_a_masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
+; RV32-LABEL: not_a_masked_merge0:
+; RV32: # %bb.0:
+; RV32-NEXT: and a1, a0, a1
+; RV32-NEXT: neg a0, a0
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: not_a_masked_merge0:
+; RV64: # %bb.0:
+; RV64-NEXT: and a1, a0, a1
+; RV64-NEXT: negw a0, a0
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not_a_not = sub i32 0, %a0
+ %and1 = and i32 %not_a_not, %a2
+ %or = or i32 %and0, %and1
+ ret i32 %or
+}
+
+define i32 @not_a_masked_merge1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; CHECK-I-LABEL: not_a_masked_merge1:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: and a0, a0, a1
+; CHECK-I-NEXT: not a1, a3
+; CHECK-I-NEXT: and a1, a1, a2
+; CHECK-I-NEXT: or a0, a0, a1
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: not_a_masked_merge1:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: and a0, a0, a1
+; CHECK-ZBB-NEXT: andn a1, a2, a3
+; CHECK-ZBB-NEXT: or a0, a0, a1
+; CHECK-ZBB-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not = xor i32 %a3, -1
+ %and1 = and i32 %not, %a2
+ %or = or i32 %and0, %and1
+ ret i32 %or
+}
+
+define i32 @not_a_masked_merge2(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-I-LABEL: not_a_masked_merge2:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: or a1, a0, a1
+; CHECK-I-NEXT: not a0, a0
+; CHECK-I-NEXT: and a0, a0, a2
+; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: not_a_masked_merge2:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: or a1, a0, a1
+; CHECK-ZBB-NEXT: andn a0, a2, a0
+; CHECK-ZBB-NEXT: or a0, a1, a0
+; CHECK-ZBB-NEXT: ret
+ %not_an_and0 = or i32 %a0, %a1
+ %not = xor i32 %a0, -1
+ %and1 = and i32 %not, %a2
+ %or = or i32 %not_an_and0, %and1
+ ret i32 %or
+}
+
+define i32 @not_a_masked_merge3(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-I-LABEL: not_a_masked_merge3:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: and a1, a0, a1
+; CHECK-I-NEXT: xor a0, a0, a2
+; CHECK-I-NEXT: not a0, a0
+; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: not_a_masked_merge3:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: and a1, a0, a1
+; CHECK-ZBB-NEXT: xor a0, a0, a2
+; CHECK-ZBB-NEXT: orn a0, a1, a0
+; CHECK-ZBB-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not = xor i32 %a0, -1
+ %not_an_and1 = xor i32 %not, %a2
+ %or = or i32 %and0, %not_an_and1
+ ret i32 %or
+}
+
+define i32 @not_a_masked_merge4(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: not_a_masked_merge4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not = xor i32 %a2, -1
+ %and1 = and i32 %not, %a2
+ %or = or i32 %and0, %and1
+ ret i32 %or
+}
+
+define i32 @masked_merge_no_transform0(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; CHECK-I-LABEL: masked_merge_no_transform0:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: and a1, a0, a1
+; CHECK-I-NEXT: not a0, a0
+; CHECK-I-NEXT: and a0, a0, a2
+; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: sw a1, 0(a3)
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: masked_merge_no_transform0:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: and a1, a0, a1
+; CHECK-ZBB-NEXT: andn a0, a2, a0
+; CHECK-ZBB-NEXT: or a0, a1, a0
+; CHECK-ZBB-NEXT: sw a1, 0(a3)
+; CHECK-ZBB-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not = xor i32 %a0, -1
+ %and1 = and i32 %not, %a2
+ %or = or i32 %and0, %and1
+ store i32 %and0, ptr %p1
+ ret i32 %or
+}
+
+define i32 @masked_merge_no_transform1(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; CHECK-I-LABEL: masked_merge_no_transform1:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: and a1, a0, a1
+; CHECK-I-NEXT: not a4, a0
+; CHECK-I-NEXT: and a0, a4, a2
+; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: sw a4, 0(a3)
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: masked_merge_no_transform1:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: and a1, a0, a1
+; CHECK-ZBB-NEXT: not a4, a0
+; CHECK-ZBB-NEXT: andn a0, a2, a0
+; CHECK-ZBB-NEXT: or a0, a1, a0
+; CHECK-ZBB-NEXT: sw a4, 0(a3)
+; CHECK-ZBB-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not = xor i32 %a0, -1
+ %and1 = and i32 %not, %a2
+ %or = or i32 %and0, %and1
+ store i32 %not, ptr %p1
+ ret i32 %or
+}
+
+define i32 @masked_merge_no_transform2(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
+; CHECK-I-LABEL: masked_merge_no_transform2:
+; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: and a1, a0, a1
+; CHECK-I-NEXT: not a0, a0
+; CHECK-I-NEXT: and a2, a0, a2
+; CHECK-I-NEXT: or a0, a1, a2
+; CHECK-I-NEXT: sw a2, 0(a3)
+; CHECK-I-NEXT: ret
+;
+; CHECK-ZBB-LABEL: masked_merge_no_transform2:
+; CHECK-ZBB: # %bb.0:
+; CHECK-ZBB-NEXT: and a1, a0, a1
+; CHECK-ZBB-NEXT: andn a2, a2, a0
+; CHECK-ZBB-NEXT: or a0, a1, a2
+; CHECK-ZBB-NEXT: sw a2, 0(a3)
+; CHECK-ZBB-NEXT: ret
+ %and0 = and i32 %a0, %a1
+ %not = xor i32 %a0, -1
+ %and1 = and i32 %not, %a2
+ %or = or i32 %and0, %and1
+ store i32 %and1, ptr %p1
+ ret i32 %or
+}
>From 45913033e40dd30b4cf1cbaf3792573ef3cf4347 Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Mon, 28 Apr 2025 22:39:24 +0800
Subject: [PATCH 2/2] [SelectionDAG] Make `(a & x) | (~a & y) -> (a & (x ^ y))
^ y` available for all targets
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 55 +++++++++++++
llvm/lib/Target/X86/X86ISelLowering.cpp | 58 -------------
llvm/test/CodeGen/RISCV/fold-masked-merge.ll | 42 ++++------
...unfold-masked-merge-scalar-variablemask.ll | 62 ++++++--------
llvm/test/CodeGen/X86/bitselect.ll | 76 +++++++++--------
llvm/test/CodeGen/X86/fold-masked-merge.ll | 30 +++----
...unfold-masked-merge-scalar-variablemask.ll | 26 +++---
...unfold-masked-merge-vector-variablemask.ll | 82 ++++++++-----------
8 files changed, 194 insertions(+), 237 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 282dc4470238d..7a52acb31d2ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8108,6 +8108,57 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue foldMaskedMergeImpl(SDValue AndL0, SDValue AndR0, SDValue AndL1,
+ SDValue AndR1, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ if (!isBitwiseNot(AndL0, true) || !AndL0->hasOneUse())
+ return SDValue();
+ SDValue NotOp = AndL0->getOperand(0);
+ if (NotOp == AndR1)
+ std::swap(AndR1, AndL1);
+ if (NotOp != AndL1)
+ return SDValue();
+
+ // (~(NotOp) & And0_R) | (NotOp & And1_R)
+ // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
+ EVT VT = AndL1->getValueType(0);
+ SDValue FreezeAndR0 = DAG.getNode(ISD::FREEZE, SDLoc(), VT, AndR0);
+ SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, AndR1, FreezeAndR0);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
+ SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, FreezeAndR0);
+ return Xor1;
+}
+
+/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
+/// equivalent `((x ^ y) & m) ^ y)` pattern.
+/// This is typically a better representation for targets without a fused
+/// "and-not" operation.
+static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
+ const SDLoc &DL) {
+ // Note that masked-merge variants using XOR or ADD expressions are
+ // normalized to OR by InstCombine so we only check for OR.
+ assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
+ SDValue N0 = Node->getOperand(0);
+ if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
+ return SDValue();
+ SDValue N1 = Node->getOperand(1);
+ if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
+ return SDValue();
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+ if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
+ return Result;
+ return SDValue();
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8286,6 +8337,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
return R;
+ if (!TLI.hasAndNot(SDValue(N, 0)) && VT.isScalarInteger() && VT != MVT::i1)
+ if (SDValue R = foldMaskedMerge(N, DAG, DL))
+ return R;
+
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b07843523a15b..4f01345f73f94 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52005,59 +52005,6 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
}
-static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
- SDValue And1_L, SDValue And1_R,
- const SDLoc &DL, SelectionDAG &DAG) {
- if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
- return SDValue();
- SDValue NotOp = And0_L->getOperand(0);
- if (NotOp == And1_R)
- std::swap(And1_R, And1_L);
- if (NotOp != And1_L)
- return SDValue();
-
- // (~(NotOp) & And0_R) | (NotOp & And1_R)
- // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
- EVT VT = And1_L->getValueType(0);
- SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
- SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
- SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
- SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
- return Xor1;
-}
-
-/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
-/// equivalent `((x ^ y) & m) ^ y)` pattern.
-/// This is typically a better representation for targets without a fused
-/// "and-not" operation. This function is intended to be called from a
-/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
-static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
- // Note that masked-merge variants using XOR or ADD expressions are
- // normalized to OR by InstCombine so we only check for OR.
- assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
- SDValue N0 = Node->getOperand(0);
- if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
- return SDValue();
- SDValue N1 = Node->getOperand(1);
- if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
- return SDValue();
-
- SDLoc DL(Node);
- SDValue N00 = N0->getOperand(0);
- SDValue N01 = N0->getOperand(1);
- SDValue N10 = N1->getOperand(0);
- SDValue N11 = N1->getOperand(1);
- if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
- return Result;
- if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
- return Result;
- if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
- return Result;
- if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
- return Result;
- return SDValue();
-}
-
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
/// with CMP+{ADC, SBB}.
@@ -52461,11 +52408,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
}
- // We should fold "masked merge" patterns when `andn` is not available.
- if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
- if (SDValue R = foldMaskedMerge(N, DAG))
- return R;
-
if (SDValue R = combineOrXorWithSETCC(N->getOpcode(), dl, VT, N0, N1, DAG))
return R;
diff --git a/llvm/test/CodeGen/RISCV/fold-masked-merge.ll b/llvm/test/CodeGen/RISCV/fold-masked-merge.ll
index 9987ea6e8352c..631b7109281e5 100644
--- a/llvm/test/CodeGen/RISCV/fold-masked-merge.ll
+++ b/llvm/test/CodeGen/RISCV/fold-masked-merge.ll
@@ -14,10 +14,9 @@
define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-I-LABEL: masked_merge0:
; CHECK-I: # %bb.0:
-; CHECK-I-NEXT: and a1, a0, a1
-; CHECK-I-NEXT: not a0, a0
-; CHECK-I-NEXT: and a0, a0, a2
-; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: xor a1, a1, a2
+; CHECK-I-NEXT: and a0, a1, a0
+; CHECK-I-NEXT: xor a0, a0, a2
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: masked_merge0:
@@ -36,10 +35,9 @@ define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
; CHECK-I-LABEL: masked_merge1:
; CHECK-I: # %bb.0:
-; CHECK-I-NEXT: and a1, a0, a1
-; CHECK-I-NEXT: not a0, a0
-; CHECK-I-NEXT: and a0, a2, a0
-; CHECK-I-NEXT: or a0, a1, a0
+; CHECK-I-NEXT: xor a1, a1, a2
+; CHECK-I-NEXT: and a0, a1, a0
+; CHECK-I-NEXT: xor a0, a0, a2
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: masked_merge1:
@@ -58,10 +56,7 @@ define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
; CHECK-I-LABEL: masked_merge2:
; CHECK-I: # %bb.0:
-; CHECK-I-NEXT: not a2, a0
-; CHECK-I-NEXT: and a2, a2, a1
-; CHECK-I-NEXT: and a0, a1, a0
-; CHECK-I-NEXT: or a0, a2, a0
+; CHECK-I-NEXT: mv a0, a1
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: masked_merge2:
@@ -80,28 +75,25 @@ define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
define i64 @masked_merge3(i64 %a0, i64 %a1, i64 %a2) {
; RV32I-LABEL: masked_merge3:
; RV32I: # %bb.0:
+; RV32I-NEXT: not a5, a5
+; RV32I-NEXT: not a4, a4
+; RV32I-NEXT: xor a3, a3, a5
+; RV32I-NEXT: xor a2, a2, a4
; RV32I-NEXT: not a2, a2
; RV32I-NEXT: not a3, a3
-; RV32I-NEXT: not a4, a4
-; RV32I-NEXT: not a5, a5
-; RV32I-NEXT: not a6, a0
-; RV32I-NEXT: not a7, a1
-; RV32I-NEXT: and a5, a7, a5
-; RV32I-NEXT: and a4, a6, a4
-; RV32I-NEXT: and a1, a3, a1
; RV32I-NEXT: and a0, a2, a0
-; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: or a1, a5, a1
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: xor a0, a0, a4
+; RV32I-NEXT: xor a1, a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: masked_merge3:
; RV64I: # %bb.0:
-; RV64I-NEXT: not a1, a1
; RV64I-NEXT: not a2, a2
-; RV64I-NEXT: not a3, a0
-; RV64I-NEXT: and a2, a3, a2
+; RV64I-NEXT: xor a1, a1, a2
+; RV64I-NEXT: not a1, a1
; RV64I-NEXT: and a0, a1, a0
-; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: xor a0, a0, a2
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: masked_merge3:
diff --git a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
index 1517e524a7f78..efc8243df71e0 100644
--- a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
+++ b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
@@ -8,16 +8,13 @@
; RUN: llc -mtriple=riscv64 -mattr=+zbb < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV64,RV64ZBB
-; TODO: Should we convert these to X ^ ((X ^ Y) & M) form when Zbb isn't
-; present?
define i8 @out8(i8 %x, i8 %y, i8 %mask) {
; CHECK-I-LABEL: out8:
; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: xor a0, a0, a1
; CHECK-I-NEXT: and a0, a0, a2
-; CHECK-I-NEXT: not a2, a2
-; CHECK-I-NEXT: and a1, a1, a2
-; CHECK-I-NEXT: or a0, a0, a1
+; CHECK-I-NEXT: xor a0, a0, a1
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: out8:
@@ -36,10 +33,9 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) {
define i16 @out16(i16 %x, i16 %y, i16 %mask) {
; CHECK-I-LABEL: out16:
; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: xor a0, a0, a1
; CHECK-I-NEXT: and a0, a0, a2
-; CHECK-I-NEXT: not a2, a2
-; CHECK-I-NEXT: and a1, a1, a2
-; CHECK-I-NEXT: or a0, a0, a1
+; CHECK-I-NEXT: xor a0, a0, a1
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: out16:
@@ -58,10 +54,9 @@ define i16 @out16(i16 %x, i16 %y, i16 %mask) {
define i32 @out32(i32 %x, i32 %y, i32 %mask) {
; CHECK-I-LABEL: out32:
; CHECK-I: # %bb.0:
+; CHECK-I-NEXT: xor a0, a0, a1
; CHECK-I-NEXT: and a0, a0, a2
-; CHECK-I-NEXT: not a2, a2
-; CHECK-I-NEXT: and a1, a1, a2
-; CHECK-I-NEXT: or a0, a0, a1
+; CHECK-I-NEXT: xor a0, a0, a1
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: out32:
@@ -80,22 +75,19 @@ define i32 @out32(i32 %x, i32 %y, i32 %mask) {
define i64 @out64(i64 %x, i64 %y, i64 %mask) {
; RV32I-LABEL: out64:
; RV32I: # %bb.0:
-; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: xor a1, a1, a3
; RV32I-NEXT: and a0, a0, a4
-; RV32I-NEXT: not a4, a4
-; RV32I-NEXT: not a5, a5
-; RV32I-NEXT: and a3, a3, a5
-; RV32I-NEXT: and a2, a2, a4
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: xor a1, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: out64:
; RV64I: # %bb.0:
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: and a0, a0, a2
-; RV64I-NEXT: not a2, a2
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: out64:
@@ -660,10 +652,9 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
; CHECK-I-LABEL: out_constant_varx_42:
; CHECK-I: # %bb.0:
-; CHECK-I-NEXT: not a1, a2
-; CHECK-I-NEXT: and a0, a2, a0
-; CHECK-I-NEXT: andi a1, a1, 42
-; CHECK-I-NEXT: or a0, a0, a1
+; CHECK-I-NEXT: xori a0, a0, 42
+; CHECK-I-NEXT: and a0, a0, a2
+; CHECK-I-NEXT: xori a0, a0, 42
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: out_constant_varx_42:
@@ -704,10 +695,9 @@ define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-I-LABEL: out_constant_varx_42_invmask:
; CHECK-I: # %bb.0:
-; CHECK-I-NEXT: not a1, a2
-; CHECK-I-NEXT: and a0, a1, a0
-; CHECK-I-NEXT: andi a1, a2, 42
-; CHECK-I-NEXT: or a0, a0, a1
+; CHECK-I-NEXT: xori a1, a0, 42
+; CHECK-I-NEXT: and a1, a1, a2
+; CHECK-I-NEXT: xor a0, a1, a0
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: out_constant_varx_42_invmask:
@@ -812,10 +802,9 @@ define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
; CHECK-I-LABEL: out_constant_42_vary:
; CHECK-I: # %bb.0:
-; CHECK-I-NEXT: not a0, a2
-; CHECK-I-NEXT: andi a2, a2, 42
-; CHECK-I-NEXT: and a0, a0, a1
-; CHECK-I-NEXT: or a0, a2, a0
+; CHECK-I-NEXT: xori a0, a1, 42
+; CHECK-I-NEXT: and a0, a0, a2
+; CHECK-I-NEXT: xor a0, a0, a1
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: out_constant_42_vary:
@@ -855,10 +844,9 @@ define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-I-LABEL: out_constant_42_vary_invmask:
; CHECK-I: # %bb.0:
-; CHECK-I-NEXT: not a0, a2
-; CHECK-I-NEXT: andi a0, a0, 42
-; CHECK-I-NEXT: and a1, a2, a1
-; CHECK-I-NEXT: or a0, a0, a1
+; CHECK-I-NEXT: xori a0, a1, 42
+; CHECK-I-NEXT: and a0, a0, a2
+; CHECK-I-NEXT: xori a0, a0, 42
; CHECK-I-NEXT: ret
;
; CHECK-ZBB-LABEL: out_constant_42_vary_invmask:
diff --git a/llvm/test/CodeGen/X86/bitselect.ll b/llvm/test/CodeGen/X86/bitselect.ll
index 2922113b14ea9..48733b206d446 100644
--- a/llvm/test/CodeGen/X86/bitselect.ll
+++ b/llvm/test/CodeGen/X86/bitselect.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOBMI
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-bmi | FileCheck %s --check-prefixes=X64-NOBMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64-BMI
; PR46472
; bitselect(a,b,m) == or(and(a,not(m)),and(b,m))
@@ -17,14 +17,22 @@ define i8 @bitselect_i8(i8 %a, i8 %b, i8 %m) nounwind {
; X86-NEXT: xorb %cl, %al
; X86-NEXT: retl
;
-; X64-LABEL: bitselect_i8:
-; X64: # %bb.0:
-; X64-NEXT: andl %edx, %esi
-; X64-NEXT: movl %edx, %eax
-; X64-NEXT: notb %al
-; X64-NEXT: andb %dil, %al
-; X64-NEXT: orb %sil, %al
-; X64-NEXT: retq
+; X64-NOBMI-LABEL: bitselect_i8:
+; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %esi, %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
+; X64-NOBMI-NEXT: andl %edx, %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
+; X64-NOBMI-NEXT: # kill: def $al killed $al killed $eax
+; X64-NOBMI-NEXT: retq
+;
+; X64-BMI-LABEL: bitselect_i8:
+; X64-BMI: # %bb.0:
+; X64-BMI-NEXT: andnl %edi, %edx, %eax
+; X64-BMI-NEXT: andl %edx, %esi
+; X64-BMI-NEXT: orl %esi, %eax
+; X64-BMI-NEXT: # kill: def $al killed $al killed $eax
+; X64-BMI-NEXT: retq
%not = xor i8 %m, -1
%ma = and i8 %a, %not
%mb = and i8 %b, %m
@@ -45,11 +53,10 @@ define i16 @bitselect_i16(i16 %a, i16 %b, i16 %m) nounwind {
;
; X64-NOBMI-LABEL: bitselect_i16:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movl %edx, %eax
-; X64-NOBMI-NEXT: andl %edx, %esi
-; X64-NOBMI-NEXT: notl %eax
-; X64-NOBMI-NEXT: andl %edi, %eax
-; X64-NOBMI-NEXT: orl %esi, %eax
+; X64-NOBMI-NEXT: movl %esi, %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
+; X64-NOBMI-NEXT: andl %edx, %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOBMI-NEXT: retq
;
@@ -101,18 +108,16 @@ define i32 @bitselect_i32(i32 %a, i32 %b, i32 %m) nounwind {
define i64 @bitselect_i64(i64 %a, i64 %b, i64 %m) nounwind {
; X86-LABEL: bitselect_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl %edx, %eax
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl %edx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %esi, %edx
-; X86-NEXT: popl %esi
+; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: retl
;
; X64-NOBMI-LABEL: bitselect_i64:
@@ -142,15 +147,15 @@ define i128 @bitselect_i128(i128 %a, i128 %b, i128 %m) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edi, %ecx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %edi, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: xorl %ebx, %edi
; X86-NEXT: andl {{[0-9]+}}(%esp), %edi
@@ -160,13 +165,13 @@ define i128 @bitselect_i128(i128 %a, i128 %b, i128 %m) nounwind {
; X86-NEXT: andl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: xorl %ecx, %esi
; X86-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: xorl %ecx, %esi
; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -186,13 +191,12 @@ define i128 @bitselect_i128(i128 %a, i128 %b, i128 %m) nounwind {
;
; X64-BMI-LABEL: bitselect_i128:
; X64-BMI: # %bb.0:
-; X64-BMI-NEXT: andnq %rsi, %r9, %rsi
; X64-BMI-NEXT: andnq %rdi, %r8, %rax
-; X64-BMI-NEXT: andq %r9, %rcx
-; X64-BMI-NEXT: orq %rcx, %rsi
; X64-BMI-NEXT: andq %r8, %rdx
; X64-BMI-NEXT: orq %rdx, %rax
-; X64-BMI-NEXT: movq %rsi, %rdx
+; X64-BMI-NEXT: andnq %rsi, %r9, %rdx
+; X64-BMI-NEXT: andq %r9, %rcx
+; X64-BMI-NEXT: orq %rcx, %rdx
; X64-BMI-NEXT: retq
%not = xor i128 %m, -1
%ma = and i128 %a, %not
diff --git a/llvm/test/CodeGen/X86/fold-masked-merge.ll b/llvm/test/CodeGen/X86/fold-masked-merge.ll
index b2614c5fe0493..4a4eecbdfb3f3 100644
--- a/llvm/test/CodeGen/X86/fold-masked-merge.ll
+++ b/llvm/test/CodeGen/X86/fold-masked-merge.ll
@@ -30,18 +30,17 @@ define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
; NOBMI-LABEL: masked_merge1:
; NOBMI: # %bb.0:
-; NOBMI-NEXT: movl %edi, %eax
-; NOBMI-NEXT: andl %edi, %esi
-; NOBMI-NEXT: notl %eax
-; NOBMI-NEXT: andl %edx, %eax
-; NOBMI-NEXT: orl %esi, %eax
+; NOBMI-NEXT: movl %esi, %eax
+; NOBMI-NEXT: xorl %edx, %eax
+; NOBMI-NEXT: andl %edi, %eax
+; NOBMI-NEXT: xorl %edx, %eax
; NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
; NOBMI-NEXT: retq
;
; BMI-LABEL: masked_merge1:
; BMI: # %bb.0:
-; BMI-NEXT: andl %edi, %esi
; BMI-NEXT: andnl %edx, %edi, %eax
+; BMI-NEXT: andl %edi, %esi
; BMI-NEXT: orl %esi, %eax
; BMI-NEXT: # kill: def $ax killed $ax killed $eax
; BMI-NEXT: retq
@@ -53,20 +52,11 @@ define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
}
define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
-; NOBMI-LABEL: masked_merge2:
-; NOBMI: # %bb.0:
-; NOBMI-NEXT: movl %esi, %eax
-; NOBMI-NEXT: # kill: def $al killed $al killed $eax
-; NOBMI-NEXT: retq
-;
-; BMI-LABEL: masked_merge2:
-; BMI: # %bb.0:
-; BMI-NEXT: movl %edi, %eax
-; BMI-NEXT: notb %al
-; BMI-NEXT: andb %sil, %al
-; BMI-NEXT: andb %dil, %sil
-; BMI-NEXT: orb %sil, %al
-; BMI-NEXT: retq
+; CHECK-LABEL: masked_merge2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
%not = xor i8 %a0, -1
%and0 = and i8 %not, %a1
%and1 = and i8 %a1, %a0
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
index 9c9d06921096c..6a55d740fe421 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
@@ -6,21 +6,18 @@
define i8 @out8(i8 %x, i8 %y, i8 %mask) {
; CHECK-NOBMI-LABEL: out8:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: movl %edx, %eax
-; CHECK-NOBMI-NEXT: andl %edx, %edi
-; CHECK-NOBMI-NEXT: notb %al
-; CHECK-NOBMI-NEXT: andb %sil, %al
-; CHECK-NOBMI-NEXT: orb %dil, %al
+; CHECK-NOBMI-NEXT: movl %edi, %eax
+; CHECK-NOBMI-NEXT: xorl %esi, %eax
+; CHECK-NOBMI-NEXT: andl %edx, %eax
+; CHECK-NOBMI-NEXT: xorl %esi, %eax
; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out8:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: movl %edx, %eax
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: notb %al
-; CHECK-BMI-NEXT: andb %sil, %al
-; CHECK-BMI-NEXT: orb %dil, %al
+; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax
; CHECK-BMI-NEXT: retq
%mx = and i8 %x, %mask
@@ -33,18 +30,17 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) {
define i16 @out16(i16 %x, i16 %y, i16 %mask) {
; CHECK-NOBMI-LABEL: out16:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: movl %edx, %eax
-; CHECK-NOBMI-NEXT: andl %edx, %edi
-; CHECK-NOBMI-NEXT: notl %eax
-; CHECK-NOBMI-NEXT: andl %esi, %eax
-; CHECK-NOBMI-NEXT: orl %edi, %eax
+; CHECK-NOBMI-NEXT: movl %edi, %eax
+; CHECK-NOBMI-NEXT: xorl %esi, %eax
+; CHECK-NOBMI-NEXT: andl %edx, %eax
+; CHECK-NOBMI-NEXT: xorl %esi, %eax
; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out16:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: andl %edx, %edi
; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
+; CHECK-BMI-NEXT: andl %edx, %edi
; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-BMI-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
index b1194bedc4e1c..f2cd17b807465 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
@@ -16,11 +16,10 @@
define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
; CHECK-LABEL: out_v1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: andl %edx, %edi
-; CHECK-NEXT: notb %al
-; CHECK-NEXT: andb %sil, %al
-; CHECK-NEXT: orb %dil, %al
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: andl %edx, %eax
+; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%mx = and <1 x i8> %x, %mask
@@ -37,32 +36,28 @@ define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
; CHECK-BASELINE-LABEL: out_v2i8:
; CHECK-BASELINE: # %bb.0:
-; CHECK-BASELINE-NEXT: movl %r8d, %eax
+; CHECK-BASELINE-NEXT: movl %edi, %eax
+; CHECK-BASELINE-NEXT: xorl %edx, %eax
+; CHECK-BASELINE-NEXT: andl %r8d, %eax
+; CHECK-BASELINE-NEXT: xorl %edx, %eax
+; CHECK-BASELINE-NEXT: xorl %ecx, %esi
; CHECK-BASELINE-NEXT: andl %r9d, %esi
-; CHECK-BASELINE-NEXT: andl %r8d, %edi
-; CHECK-BASELINE-NEXT: notb %al
-; CHECK-BASELINE-NEXT: notb %r9b
-; CHECK-BASELINE-NEXT: andb %cl, %r9b
-; CHECK-BASELINE-NEXT: andb %dl, %al
-; CHECK-BASELINE-NEXT: orb %dil, %al
-; CHECK-BASELINE-NEXT: orb %sil, %r9b
+; CHECK-BASELINE-NEXT: xorl %ecx, %esi
; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax
-; CHECK-BASELINE-NEXT: movl %r9d, %edx
+; CHECK-BASELINE-NEXT: movl %esi, %edx
; CHECK-BASELINE-NEXT: retq
;
; CHECK-SSE1-LABEL: out_v2i8:
; CHECK-SSE1: # %bb.0:
-; CHECK-SSE1-NEXT: movl %r8d, %eax
+; CHECK-SSE1-NEXT: movl %edi, %eax
+; CHECK-SSE1-NEXT: xorl %edx, %eax
+; CHECK-SSE1-NEXT: andl %r8d, %eax
+; CHECK-SSE1-NEXT: xorl %edx, %eax
+; CHECK-SSE1-NEXT: xorl %ecx, %esi
; CHECK-SSE1-NEXT: andl %r9d, %esi
-; CHECK-SSE1-NEXT: andl %r8d, %edi
-; CHECK-SSE1-NEXT: notb %al
-; CHECK-SSE1-NEXT: notb %r9b
-; CHECK-SSE1-NEXT: andb %cl, %r9b
-; CHECK-SSE1-NEXT: andb %dl, %al
-; CHECK-SSE1-NEXT: orb %dil, %al
-; CHECK-SSE1-NEXT: orb %sil, %r9b
+; CHECK-SSE1-NEXT: xorl %ecx, %esi
; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax
-; CHECK-SSE1-NEXT: movl %r9d, %edx
+; CHECK-SSE1-NEXT: movl %esi, %edx
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: out_v2i8:
@@ -86,11 +81,10 @@ define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
; CHECK-LABEL: out_v1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: andl %edx, %edi
-; CHECK-NEXT: notl %eax
-; CHECK-NEXT: andl %esi, %eax
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: andl %edx, %eax
+; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%mx = and <1 x i16> %x, %mask
@@ -235,32 +229,28 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi
define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
; CHECK-BASELINE-LABEL: out_v2i16:
; CHECK-BASELINE: # %bb.0:
-; CHECK-BASELINE-NEXT: movl %r8d, %eax
+; CHECK-BASELINE-NEXT: movl %edi, %eax
+; CHECK-BASELINE-NEXT: xorl %edx, %eax
+; CHECK-BASELINE-NEXT: andl %r8d, %eax
+; CHECK-BASELINE-NEXT: xorl %edx, %eax
+; CHECK-BASELINE-NEXT: xorl %ecx, %esi
; CHECK-BASELINE-NEXT: andl %r9d, %esi
-; CHECK-BASELINE-NEXT: andl %r8d, %edi
-; CHECK-BASELINE-NEXT: notl %eax
-; CHECK-BASELINE-NEXT: notl %r9d
-; CHECK-BASELINE-NEXT: andl %ecx, %r9d
-; CHECK-BASELINE-NEXT: orl %esi, %r9d
-; CHECK-BASELINE-NEXT: andl %edx, %eax
-; CHECK-BASELINE-NEXT: orl %edi, %eax
+; CHECK-BASELINE-NEXT: xorl %ecx, %esi
; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-BASELINE-NEXT: movl %r9d, %edx
+; CHECK-BASELINE-NEXT: movl %esi, %edx
; CHECK-BASELINE-NEXT: retq
;
; CHECK-SSE1-LABEL: out_v2i16:
; CHECK-SSE1: # %bb.0:
-; CHECK-SSE1-NEXT: movl %r8d, %eax
+; CHECK-SSE1-NEXT: movl %edi, %eax
+; CHECK-SSE1-NEXT: xorl %edx, %eax
+; CHECK-SSE1-NEXT: andl %r8d, %eax
+; CHECK-SSE1-NEXT: xorl %edx, %eax
+; CHECK-SSE1-NEXT: xorl %ecx, %esi
; CHECK-SSE1-NEXT: andl %r9d, %esi
-; CHECK-SSE1-NEXT: andl %r8d, %edi
-; CHECK-SSE1-NEXT: notl %eax
-; CHECK-SSE1-NEXT: notl %r9d
-; CHECK-SSE1-NEXT: andl %ecx, %r9d
-; CHECK-SSE1-NEXT: orl %esi, %r9d
-; CHECK-SSE1-NEXT: andl %edx, %eax
-; CHECK-SSE1-NEXT: orl %edi, %eax
+; CHECK-SSE1-NEXT: xorl %ecx, %esi
; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-SSE1-NEXT: movl %r9d, %edx
+; CHECK-SSE1-NEXT: movl %esi, %edx
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: out_v2i16:
More information about the llvm-commits
mailing list