[llvm] aefe8f2 - [DAG] Fold vXi1 multiplies -> and
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 26 03:46:26 PST 2021
Author: Simon Pilgrim
Date: 2021-02-26T11:46:12Z
New Revision: aefe8f2f6c9e61b7bee92924b381321b4b5053ec
URL: https://github.com/llvm/llvm-project/commit/aefe8f2f6c9e61b7bee92924b381321b4b5053ec
DIFF: https://github.com/llvm/llvm-project/commit/aefe8f2f6c9e61b7bee92924b381321b4b5053ec.diff
LOG: [DAG] Fold vXi1 multiplies -> and
This allows us to remove X86 custom lowering of vXi1 MUL, which helps simplify a load of mask math.
Mentioned in D97478 post review.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/avx512bw-mask-op.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1538d2ac240d..a571ee35b388 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5323,6 +5323,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::AND, DL, VT, N1, N2);
if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
const APInt &MulImm = N1->getConstantOperandAPInt(0);
const APInt &N2CImm = N2C->getAPIntValue();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9aa76d988a20..b35d9bb5d498 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1468,7 +1468,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
- setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
@@ -1851,7 +1850,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
- setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
@@ -27261,9 +27259,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- if (VT.getScalarType() == MVT::i1)
- return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
-
// Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntBinary(Op, DAG);
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index a6fe9396f821..c9f29a689086 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -3844,49 +3844,17 @@ define i16 @test_v16i1_sub(i16 %x, i16 %y) {
}
define i16 @test_v16i1_mul(i16 %x, i16 %y) {
-; KNL-LABEL: test_v16i1_mul:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v16i1_mul:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: kandw %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v16i1_mul:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kandw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v16i1_mul:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v16i1_mul:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl %esi, %eax
+; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v16i1_mul:
; X86: ## %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kandw %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
-; X86-NEXT: ## kill: def $ax killed $ax killed $eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
; X86-NEXT: retl
%m0 = bitcast i16 %x to <16 x i1>
%m1 = bitcast i16 %y to <16 x i1>
@@ -3936,49 +3904,17 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) {
}
define i8 @test_v8i1_mul(i8 %x, i8 %y) {
-; KNL-LABEL: test_v8i1_mul:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $al killed $al killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v8i1_mul:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: kandb %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $al killed $al killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v8i1_mul:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kandw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v8i1_mul:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: kandb %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v8i1_mul:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v8i1_mul:
; X86: ## %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kandb %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
-; X86-NEXT: ## kill: def $al killed $al killed $eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: andb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
%m1 = bitcast i8 %y to <8 x i1>
@@ -5141,78 +5077,17 @@ define i1 @test_v1i1_sub(i1 %x, i1 %y) {
}
define i1 @test_v1i1_mul(i1 %x, i1 %y) {
-; KNL-LABEL: test_v1i1_mul:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: kshiftlw $15, %k0, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v1i1_mul:
-; SKX: ## %bb.0:
-; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: andl $1, %esi
-; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
-; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
-; SKX-NEXT: kandw %k1, %k0, %k0
-; SKX-NEXT: kshiftlb $7, %k0, %k0
-; SKX-NEXT: kshiftrb $7, %k0, %k0
-; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v1i1_mul:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kandw %k1, %k0, %k0
-; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v1i1_mul:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: andl $1, %edi
-; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: andl $1, %esi
-; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
-; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
-; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v1i1_mul:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v1i1_mul:
; X86: ## %bb.0:
-; X86-NEXT: pushl %eax
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kandw %k1, %k0, %k0
-; X86-NEXT: kshiftlb $7, %k0, %k0
-; X86-NEXT: kshiftrb $7, %k0, %k0
-; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: popl %ecx
+; X86-NEXT: andb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
%m1 = bitcast i1 %y to <1 x i1>
diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index 139a27e83154..92c126475fc5 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -178,10 +178,8 @@ define i32 @test_v32i1_sub(i32 %x, i32 %y) {
define i32 @test_v32i1_mul(i32 %x, i32 %y) {
; CHECK-LABEL: test_v32i1_mul:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k0
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: kandd %k1, %k0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i32 %x to <32 x i1>
%m1 = bitcast i32 %y to <32 x i1>
@@ -219,10 +217,8 @@ define i64 @test_v64i1_sub(i64 %x, i64 %y) {
define i64 @test_v64i1_mul(i64 %x, i64 %y) {
; CHECK-LABEL: test_v64i1_mul:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq %rdi, %k0
-; CHECK-NEXT: kmovq %rsi, %k1
-; CHECK-NEXT: kandq %k1, %k0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: andq %rsi, %rax
; CHECK-NEXT: retq
%m0 = bitcast i64 %x to <64 x i1>
%m1 = bitcast i64 %y to <64 x i1>
More information about the llvm-commits
mailing list