[llvm] r258914 - AVX512: Add store mask patterns.
Igor Breger via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 27 00:43:26 PST 2016
Author: ibreger
Date: Wed Jan 27 02:43:25 2016
New Revision: 258914
URL: http://llvm.org/viewvc/llvm-project?rev=258914&view=rev
Log:
AVX512: Add store mask patterns.
Differential Revision: http://reviews.llvm.org/D16596
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258914&r1=258913&r2=258914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 27 02:43:25 2016
@@ -1412,9 +1412,6 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
if (Subtarget.hasDQI()) {
- setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
-
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
@@ -1708,6 +1705,8 @@ X86TargetLowering::X86TargetLowering(con
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=258914&r1=258913&r2=258914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jan 27 02:43:25 2016
@@ -2059,9 +2059,6 @@ let Predicates = [HasBWI] in {
VEX, PD, VEX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
VEX, XD;
-}
-
-let Predicates = [HasBWI] in {
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
VEX, PS, VEX_W;
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
@@ -2101,8 +2098,27 @@ let Predicates = [HasDQI] in {
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
def : Pat<(store VK2:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
+ def : Pat<(store VK1:$src, addr:$dst),
+ (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
}
let Predicates = [HasAVX512, NoDQI] in {
+ def : Pat<(store VK1:$src, addr:$dst),
+ (MOV8mr addr:$dst,
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
+ sub_8bit))>;
+ def : Pat<(store VK2:$src, addr:$dst),
+ (MOV8mr addr:$dst,
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)),
+ sub_8bit))>;
+ def : Pat<(store VK4:$src, addr:$dst),
+ (MOV8mr addr:$dst,
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)),
+ sub_8bit))>;
+ def : Pat<(store VK8:$src, addr:$dst),
+ (MOV8mr addr:$dst,
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
+ sub_8bit))>;
+
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
(KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
@@ -2182,6 +2198,17 @@ def : Pat<(v32i1 (scalar_to_vector VK1:$
def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK64)>;
+def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
+def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
+def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
+
+def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
+}]>;
+
+def : Pat<(truncstorei1 GR8:$src, addr:$dst),
+ (MOV8mr addr:$dst, GR8:$src)>;
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512, NoDQI] in {
@@ -6562,28 +6589,6 @@ defm VSCATTERPF1QPD: avx512_gather_scatt
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
-def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
-def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
-def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
-
-def : Pat<(store VK1:$src, addr:$dst),
- (MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
- sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
-
-def : Pat<(store VK8:$src, addr:$dst),
- (MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
- sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
-
-def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
- (truncstore node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
-}]>;
-
-def : Pat<(truncstorei1 GR8:$src, addr:$dst),
- (MOV8mr addr:$dst, GR8:$src)>;
-
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=258914&r1=258913&r2=258914&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Jan 27 02:43:25 2016
@@ -1442,3 +1442,183 @@ define void @test23(<2 x i1> %a, <2 x i1
store <2 x i1> %a, <2 x i1>* %addr
ret void
}
+
+define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
+; KNL-LABEL: store_v1i1:
+; KNL: ## BB#0:
+; KNL-NEXT: andl $1, %edi
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kxnorw %k0, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kxorw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: movb %al, (%rsi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: store_v1i1:
+; SKX: ## BB#0:
+; SKX-NEXT: andl $1, %edi
+; SKX-NEXT: kmovw %edi, %k0
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kxorw %k1, %k0, %k0
+; SKX-NEXT: kmovb %k0, (%rsi)
+; SKX-NEXT: retq
+ %x = xor <1 x i1> %c, <i1 1>
+ store <1 x i1> %x, <1 x i1>* %ptr, align 4
+ ret void
+}
+
+define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
+; KNL-LABEL: store_v2i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: store_v2i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k0
+; SKX-NEXT: knotw %k0, %k0
+; SKX-NEXT: kmovb %k0, (%rdi)
+; SKX-NEXT: retq
+ %x = xor <2 x i1> %c, <i1 1, i1 1>
+ store <2 x i1> %x, <2 x i1>* %ptr, align 4
+ ret void
+}
+
+define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
+; KNL-LABEL: store_v4i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpextrd $3, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vpextrd $2, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vpextrd $1, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: store_v4i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: knotw %k0, %k0
+; SKX-NEXT: kmovb %k0, (%rdi)
+; SKX-NEXT: retq
+ %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
+ store <4 x i1> %x, <4 x i1>* %ptr, align 4
+ ret void
+}
+
+define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
+; KNL-LABEL: store_v8i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: store_v8i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k0
+; SKX-NEXT: knotb %k0, %k0
+; SKX-NEXT: kmovb %k0, (%rdi)
+; SKX-NEXT: retq
+ %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
+ store <8 x i1> %x, <8 x i1>* %ptr, align 4
+ ret void
+}
+
+define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
+; KNL-LABEL: store_v16i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: store_v16i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k0
+; SKX-NEXT: knotw %k0, %k0
+; SKX-NEXT: kmovw %k0, (%rdi)
+; SKX-NEXT: retq
+ %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
+ store <16 x i1> %x, <16 x i1>* %ptr, align 4
+ ret void
+}
+
+;void f2(int);
+;void f1(int c)
+;{
+; static int v = 0;
+; if (v == 0)
+; v = 1;
+; else
+; v = 0;
+; f2(v);
+;}
+
+ at f1.v = internal unnamed_addr global i1 false, align 4
+
+define void @f1(i32 %c) {
+; KNL-LABEL: f1:
+; KNL: ## BB#0: ## %entry
+; KNL-NEXT: movzbl {{.*}}(%rip), %edi
+; KNL-NEXT: andl $1, %edi
+; KNL-NEXT: movl %edi, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k0
+; KNL-NEXT: kxnorw %k0, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kxorw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: movb %al, {{.*}}(%rip)
+; KNL-NEXT: xorl $1, %edi
+; KNL-NEXT: jmp _f2 ## TAILCALL
+;
+; SKX-LABEL: f1:
+; SKX: ## BB#0: ## %entry
+; SKX-NEXT: movzbl {{.*}}(%rip), %edi
+; SKX-NEXT: andl $1, %edi
+; SKX-NEXT: movl %edi, %eax
+; SKX-NEXT: andl $1, %eax
+; SKX-NEXT: kmovw %eax, %k0
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kxorw %k1, %k0, %k0
+; SKX-NEXT: kmovb %k0, {{.*}}(%rip)
+; SKX-NEXT: xorl $1, %edi
+; SKX-NEXT: jmp _f2 ## TAILCALL
+entry:
+ %.b1 = load i1, i1* @f1.v, align 4
+ %not..b1 = xor i1 %.b1, true
+ store i1 %not..b1, i1* @f1.v, align 4
+ %0 = zext i1 %not..b1 to i32
+ tail call void @f2(i32 %0) #2
+ ret void
+}
+
+declare void @f2(i32) #1
+
More information about the llvm-commits
mailing list