[llvm] 87dbcd8 - [CodeGen] Canonicalise adds/subs of i1 vectors using XOR
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 25 02:31:36 PST 2021
Author: David Sherwood
Date: 2021-02-25T10:31:26Z
New Revision: 87dbcd88651a4e72fc8f5e1594f9b02232277301
URL: https://github.com/llvm/llvm-project/commit/87dbcd88651a4e72fc8f5e1594f9b02232277301
DIFF: https://github.com/llvm/llvm-project/commit/87dbcd88651a4e72fc8f5e1594f9b02232277301.diff
LOG: [CodeGen] Canonicalise adds/subs of i1 vectors using XOR
When calling SelectionDAG::getNode() to create an ADD or SUB
of two vectors with i1 element types we can canonicalise this
to use XOR instead, where 1+1 is treated as wrapping around
to 0 and 0-1 wraps to 1.
I've added the following tests for SVE targets:
CodeGen/AArch64/sve-pred-arith.ll
and modified some X86 tests to reflect the much simpler codegen
required.
Differential Revision: https://reviews.llvm.org/D97276
Added:
llvm/test/CodeGen/AArch64/sve-pred-arith.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/avx512bw-mask-op.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a871ce76b59f..3fd3e61c49ce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5315,6 +5315,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// it's worth handling here.
if (N2C && N2C->isNullValue())
return N1;
+ if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::XOR, DL, VT, N1, N2);
break;
case ISD::MUL:
assert(VT.isInteger() && "This operator does not apply to FP types!");
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-arith.ll b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll
new file mode 100644
index 000000000000..881f590b797f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; LEGAL ADDS
+
+define <vscale x 16 x i1> @add_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: add_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.b
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = add <vscale x 16 x i1> %a, %b
+ ret <vscale x 16 x i1> %res;
+}
+
+define <vscale x 8 x i1> @add_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: add_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.h
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = add <vscale x 8 x i1> %a, %b
+ ret <vscale x 8 x i1> %res;
+}
+
+define <vscale x 4 x i1> @add_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: add_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.s
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = add <vscale x 4 x i1> %a, %b
+ ret <vscale x 4 x i1> %res;
+}
+
+define <vscale x 2 x i1> @add_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: add_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.d
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = add <vscale x 2 x i1> %a, %b
+ ret <vscale x 2 x i1> %res;
+}
+
+
+; ILLEGAL ADDS
+
+define aarch64_sve_vector_pcs <vscale x 64 x i1> @add_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b) {
+; CHECK-LABEL: add_nxv64i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ldr p4, [x3]
+; CHECK-NEXT: ldr p5, [x0]
+; CHECK-NEXT: ldr p6, [x1]
+; CHECK-NEXT: ldr p7, [x2]
+; CHECK-NEXT: ptrue p8.b
+; CHECK-NEXT: eor p0.b, p8/z, p0.b, p5.b
+; CHECK-NEXT: eor p1.b, p8/z, p1.b, p6.b
+; CHECK-NEXT: eor p2.b, p8/z, p2.b, p7.b
+; CHECK-NEXT: eor p3.b, p8/z, p3.b, p4.b
+; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = add <vscale x 64 x i1> %a, %b
+ ret <vscale x 64 x i1> %res;
+}
+
+
+; LEGAL SUBS
+
+define <vscale x 16 x i1> @sub_xv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: sub_xv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.b
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = sub <vscale x 16 x i1> %a, %b
+ ret <vscale x 16 x i1> %res;
+}
+
+define <vscale x 8 x i1> @sub_xv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: sub_xv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.h
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = sub <vscale x 8 x i1> %a, %b
+ ret <vscale x 8 x i1> %res;
+}
+
+define <vscale x 4 x i1> @sub_xv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: sub_xv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.s
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = sub <vscale x 4 x i1> %a, %b
+ ret <vscale x 4 x i1> %res;
+}
+
+define <vscale x 2 x i1> @sub_xv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: sub_xv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p2.d
+; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: ret
+ %res = sub <vscale x 2 x i1> %a, %b
+ ret <vscale x 2 x i1> %res;
+}
+
+
+; ILLEGAL SUBGS
+
+
+define aarch64_sve_vector_pcs <vscale x 64 x i1> @sub_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b) {
+; CHECK-LABEL: sub_nxv64i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ldr p4, [x3]
+; CHECK-NEXT: ldr p5, [x0]
+; CHECK-NEXT: ldr p6, [x1]
+; CHECK-NEXT: ldr p7, [x2]
+; CHECK-NEXT: ptrue p8.b
+; CHECK-NEXT: eor p0.b, p8/z, p0.b, p5.b
+; CHECK-NEXT: eor p1.b, p8/z, p1.b, p6.b
+; CHECK-NEXT: eor p2.b, p8/z, p2.b, p7.b
+; CHECK-NEXT: eor p3.b, p8/z, p3.b, p4.b
+; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = sub <vscale x 64 x i1> %a, %b
+ ret <vscale x 64 x i1> %res;
+}
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 9b81809962aa..a6fe9396f821 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -3804,49 +3804,17 @@ define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
}
define i16 @test_v16i1_add(i16 %x, i16 %y) {
-; KNL-LABEL: test_v16i1_add:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kxorw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v16i1_add:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: kxorw %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v16i1_add:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kxorw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v16i1_add:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v16i1_add:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v16i1_add:
; X86: ## %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kxorw %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
-; X86-NEXT: ## kill: def $ax killed $ax killed $eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax
; X86-NEXT: retl
%m0 = bitcast i16 %x to <16 x i1>
%m1 = bitcast i16 %y to <16 x i1>
@@ -3856,49 +3824,17 @@ define i16 @test_v16i1_add(i16 %x, i16 %y) {
}
define i16 @test_v16i1_sub(i16 %x, i16 %y) {
-; KNL-LABEL: test_v16i1_sub:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kxorw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v16i1_sub:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: kxorw %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v16i1_sub:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kxorw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v16i1_sub:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v16i1_sub:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v16i1_sub:
; X86: ## %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kxorw %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
-; X86-NEXT: ## kill: def $ax killed $ax killed $eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax
; X86-NEXT: retl
%m0 = bitcast i16 %x to <16 x i1>
%m1 = bitcast i16 %y to <16 x i1>
@@ -3960,49 +3896,17 @@ define i16 @test_v16i1_mul(i16 %x, i16 %y) {
}
define i8 @test_v8i1_add(i8 %x, i8 %y) {
-; KNL-LABEL: test_v8i1_add:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kxorw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $al killed $al killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v8i1_add:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: kxorb %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $al killed $al killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v8i1_add:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kxorw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v8i1_add:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v8i1_add:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v8i1_add:
; X86: ## %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kxorb %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
-; X86-NEXT: ## kill: def $al killed $al killed $eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
%m1 = bitcast i8 %y to <8 x i1>
@@ -4012,49 +3916,17 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) {
}
define i8 @test_v8i1_sub(i8 %x, i8 %y) {
-; KNL-LABEL: test_v8i1_sub:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kxorw %k1, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $al killed $al killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v8i1_sub:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: kxorb %k1, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $al killed $al killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v8i1_sub:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kxorw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v8i1_sub:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v8i1_sub:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v8i1_sub:
; X86: ## %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kxorb %k1, %k0, %k0
-; X86-NEXT: kmovd %k0, %eax
-; X86-NEXT: ## kill: def $al killed $al killed $eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
%m1 = bitcast i8 %y to <8 x i1>
@@ -5229,78 +5101,17 @@ define <64 x i1> @mask64_insert(i32 %a) {
}
define i1 @test_v1i1_add(i1 %x, i1 %y) {
-; KNL-LABEL: test_v1i1_add:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kxorw %k1, %k0, %k0
-; KNL-NEXT: kshiftlw $15, %k0, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v1i1_add:
-; SKX: ## %bb.0:
-; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: andl $1, %esi
-; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
-; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
-; SKX-NEXT: kxorw %k1, %k0, %k0
-; SKX-NEXT: kshiftlb $7, %k0, %k0
-; SKX-NEXT: kshiftrb $7, %k0, %k0
-; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v1i1_add:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kxorw %k1, %k0, %k0
-; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v1i1_add:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: andl $1, %edi
-; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: andl $1, %esi
-; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
-; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
-; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v1i1_add:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v1i1_add:
; X86: ## %bb.0:
-; X86-NEXT: pushl %eax
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kxorw %k1, %k0, %k0
-; X86-NEXT: kshiftlb $7, %k0, %k0
-; X86-NEXT: kshiftrb $7, %k0, %k0
-; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: popl %ecx
+; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
%m1 = bitcast i1 %y to <1 x i1>
@@ -5310,78 +5121,17 @@ define i1 @test_v1i1_add(i1 %x, i1 %y) {
}
define i1 @test_v1i1_sub(i1 %x, i1 %y) {
-; KNL-LABEL: test_v1i1_sub:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kxorw %k1, %k0, %k0
-; KNL-NEXT: kshiftlw $15, %k0, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_v1i1_sub:
-; SKX: ## %bb.0:
-; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: andl $1, %esi
-; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
-; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
-; SKX-NEXT: kxorw %k1, %k0, %k0
-; SKX-NEXT: kshiftlb $7, %k0, %k0
-; SKX-NEXT: kshiftrb $7, %k0, %k0
-; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: test_v1i1_sub:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: kxorw %k1, %k0, %k0
-; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: test_v1i1_sub:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: andl $1, %edi
-; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: andl $1, %esi
-; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
-; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
-; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: test_v1i1_sub:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; X86-LABEL: test_v1i1_sub:
; X86: ## %bb.0:
-; X86-NEXT: pushl %eax
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT: kxorw %k1, %k0, %k0
-; X86-NEXT: kshiftlb $7, %k0, %k0
-; X86-NEXT: kshiftrb $7, %k0, %k0
-; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: popl %ecx
+; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
%m1 = bitcast i1 %y to <1 x i1>
diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index dcdbb16293bd..139a27e83154 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -152,10 +152,8 @@ define i64 @mand64_mem(<64 x i1>* %x, <64 x i1>* %y) {
define i32 @test_v32i1_add(i32 %x, i32 %y) {
; CHECK-LABEL: test_v32i1_add:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k0
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: kxord %k1, %k0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i32 %x to <32 x i1>
%m1 = bitcast i32 %y to <32 x i1>
@@ -167,10 +165,8 @@ define i32 @test_v32i1_add(i32 %x, i32 %y) {
define i32 @test_v32i1_sub(i32 %x, i32 %y) {
; CHECK-LABEL: test_v32i1_sub:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k0
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: kxord %k1, %k0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i32 %x to <32 x i1>
%m1 = bitcast i32 %y to <32 x i1>
@@ -197,10 +193,8 @@ define i32 @test_v32i1_mul(i32 %x, i32 %y) {
define i64 @test_v64i1_add(i64 %x, i64 %y) {
; CHECK-LABEL: test_v64i1_add:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq %rdi, %k0
-; CHECK-NEXT: kmovq %rsi, %k1
-; CHECK-NEXT: kxorq %k1, %k0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: xorq %rsi, %rax
; CHECK-NEXT: retq
%m0 = bitcast i64 %x to <64 x i1>
%m1 = bitcast i64 %y to <64 x i1>
@@ -212,10 +206,8 @@ define i64 @test_v64i1_add(i64 %x, i64 %y) {
define i64 @test_v64i1_sub(i64 %x, i64 %y) {
; CHECK-LABEL: test_v64i1_sub:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq %rdi, %k0
-; CHECK-NEXT: kmovq %rsi, %k1
-; CHECK-NEXT: kxorq %k1, %k0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: xorq %rsi, %rax
; CHECK-NEXT: retq
%m0 = bitcast i64 %x to <64 x i1>
%m1 = bitcast i64 %y to <64 x i1>
More information about the llvm-commits
mailing list