[llvm] r227043 - AVX-512: Changes in operations on masks registers for KNL and SKX
Elena Demikhovsky
elena.demikhovsky at intel.com
Sun Jan 25 04:47:16 PST 2015
Author: delena
Date: Sun Jan 25 06:47:15 2015
New Revision: 227043
URL: http://llvm.org/viewvc/llvm-project?rev=227043&view=rev
Log:
AVX-512: Changes in operations on masks registers for KNL and SKX
- Added KSHIFTB/D/Q for skx
- Added KORTESTB/D/Q for skx
- Fixed store operation for v8i1 type for KNL
- Store size of v8i1, v4i1 and v2i1 are changed to 8 bits
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86RegisterInfo.td
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=227043&r1=227042&r2=227043&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jan 25 06:47:15 2015
@@ -12871,6 +12871,8 @@ X86TargetLowering::ExtractBitFromMaskVec
MVT EltVT = Op.getSimpleValueType();
assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
+ assert((VecVT.getVectorNumElements() <= 16 || Subtarget->hasBWI()) &&
+ "Unexpected vector type in ExtractBitFromMaskVector");
// variable index can't be handled in mask registers,
// extend vector to VR512
@@ -12884,6 +12886,8 @@ X86TargetLowering::ExtractBitFromMaskVec
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
const TargetRegisterClass* rc = getRegClassFor(VecVT);
+ if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8))
+ rc = getRegClassFor(MVT::v16i1);
unsigned MaxSift = rc->getSize()*8 - 1;
Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=227043&r1=227042&r2=227043&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Jan 25 06:47:15 2015
@@ -1624,7 +1624,8 @@ multiclass avx512_mask_mov<bits<8> opc_k
[(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
let mayStore = 1 in
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store KRC:$src, addr:$dst)]>;
}
}
@@ -1951,13 +1952,26 @@ multiclass avx512_mask_testop<bits<8> op
multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
VEX, PS;
+ let Predicates = [HasDQI] in
+ defm B : avx512_mask_testop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
+ VEX, PD;
+ let Predicates = [HasBWI] in {
+ defm Q : avx512_mask_testop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
+ VEX, PS, VEX_W;
+ defm D : avx512_mask_testop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
+ VEX, PD, VEX_W;
+ }
}
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
def : Pat<(X86cmp VK1:$src1, (i1 0)),
(KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
- (COPY_TO_REGCLASS VK1:$src1, VK16))>;
+ (COPY_TO_REGCLASS VK1:$src1, VK16))>, Requires<[HasAVX512, NoDQI]>;
+
+def : Pat<(X86cmp VK1:$src1, (i1 0)),
+ (KORTESTBrr (COPY_TO_REGCLASS VK1:$src1, VK8),
+ (COPY_TO_REGCLASS VK1:$src1, VK8))>, Requires<[HasDQI]>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -1972,7 +1986,17 @@ multiclass avx512_mask_shiftop<bits<8> o
multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
SDNode OpNode> {
defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX, TAPD, VEX_W;
+ VEX, TAPD, VEX_W;
+ let Predicates = [HasDQI] in
+ defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
+ VEX, TAPD;
+ let Predicates = [HasBWI] in {
+ defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
+ VEX, TAPD, VEX_W;
+ let Predicates = [HasDQI] in
+ defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
+ VEX, TAPD;
+ }
}
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
@@ -2023,10 +2047,14 @@ let Predicates = [HasVLX] in {
}
def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
- (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
+ (v8i1 (COPY_TO_REGCLASS
+ (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16),
+ (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
- (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
+ (v8i1 (COPY_TO_REGCLASS
+ (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16),
+ (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
//===----------------------------------------------------------------------===//
// AVX-512 - Aligned and unaligned load and store
//
@@ -5204,7 +5232,14 @@ def : Pat<(store (i1 1), addr:$dst), (M
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
def : Pat<(store VK1:$src, addr:$dst),
- (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
+ (MOV8mr addr:$dst,
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
+ sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
+
+def : Pat<(store VK8:$src, addr:$dst),
+ (MOV8mr addr:$dst,
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
+ sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr), [{
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=227043&r1=227042&r2=227043&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Sun Jan 25 06:47:15 2015
@@ -469,18 +469,18 @@ def VR256X : RegisterClass<"X86", [v32i8
256, (sequence "YMM%u", 0, 31)>;
// Mask registers
-def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
-def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
-def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
-def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
+def VK1 : RegisterClass<"X86", [i1], 8, (sequence "K%u", 0, 7)> {let Size = 8;}
+def VK2 : RegisterClass<"X86", [v2i1], 8, (add VK1)> {let Size = 8;}
+def VK4 : RegisterClass<"X86", [v4i1], 8, (add VK2)> {let Size = 8;}
+def VK8 : RegisterClass<"X86", [v8i1], 8, (add VK4)> {let Size = 8;}
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
-def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
-def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
-def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
-def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
+def VK1WM : RegisterClass<"X86", [i1], 8, (sub VK1, K0)> {let Size = 8;}
+def VK2WM : RegisterClass<"X86", [v2i1], 8, (sub VK2, K0)> {let Size = 8;}
+def VK4WM : RegisterClass<"X86", [v4i1], 8, (sub VK4, K0)> {let Size = 8;}
+def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)> {let Size = 8;}
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=227043&r1=227042&r2=227043&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Sun Jan 25 06:47:15 2015
@@ -106,7 +106,8 @@ define i32 @test10(<16 x i32> %x, i32 %i
;CHECK: vpcmpltud
;CHECK: kshiftlw $11
;CHECK: kshiftrw $15
-;CHECK: kortestw
+;KNL: kortestw
+;SKX: kortestb
;CHECK: je
;CHECK: ret
;CHECK: ret
@@ -125,7 +126,8 @@ define <16 x i32> @test11(<16 x i32>%a,
;CHECK: vpcmpgtq
;CHECK: kshiftlw $15
;CHECK: kshiftrw $15
-;CHECK: kortestw
+;KNL: kortestw
+;SKX: kortestb
;CHECK: ret
define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
@@ -150,9 +152,12 @@ define i16 @test13(i32 %a, i32 %b) {
;CHECK-LABEL: test14
;CHECK: vpcmpgtq
-;CHECK: kshiftlw $11
-;CHECK: kshiftrw $15
-;CHECK: kortestw
+;KNL: kshiftlw $11
+;KNL: kshiftrw $15
+;KNL: kortestw
+;SKX: kshiftlb $3
+;SKX: kshiftrb $7
+;SKX: kortestb
;CHECK: ret
define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
@@ -188,9 +193,11 @@ define i16 @test16(i1 *%addr, i16 %a) {
}
;CHECK-LABEL: test17
-;CHECK: kshiftlw
-;CHECK: kshiftrw
+;KNL: kshiftlw
+;KNL: kshiftrw
;KNL: korw
+;SKX: kshiftlb
+;SKX: kshiftrb
;SKX: korb
;CHECK: ret
define i8 @test17(i1 *%addr, i8 %a) {
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=227043&r1=227042&r2=227043&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Sun Jan 25 06:47:15 2015
@@ -1,28 +1,37 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX --check-prefix=CHECK
+; CHECK-LABEL: mask16
+; CHECK: kmovw
+; CHECK-NEXT: knotw
+; CHECK-NEXT: kmovw
define i16 @mask16(i16 %x) {
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <16 x i1> %m1 to i16
ret i16 %ret
-; CHECK-LABEL: mask16
-; CHECK: kmovw
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw
-; CHECK: ret
}
+; CHECK-LABEL: mask8
+; KNL: kmovw
+; KNL-NEXT: knotw
+; KNL-NEXT: kmovw
+; SKX: kmovb
+; SKX-NEXT: knotb
+; SKX-NEXT: kmovb
+
define i8 @mask8(i8 %x) {
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <8 x i1> %m1 to i8
ret i8 %ret
-; CHECK-LABEL: mask8
-; CHECK: kmovw
+}
+
+; CHECK-LABEL: mask16_mem
+; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw
+; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
; CHECK: ret
-}
define void @mask16_mem(i16* %ptr) {
%x = load i16* %ptr, align 4
@@ -31,13 +40,16 @@ define void @mask16_mem(i16* %ptr) {
%ret = bitcast <16 x i1> %m1 to i16
store i16 %ret, i16* %ptr, align 4
ret void
-; CHECK-LABEL: mask16_mem
-; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; CHECK: ret
}
+; CHECK-LABEL: mask8_mem
+; KNL: kmovw ([[ARG1]]), %k{{[0-7]}}
+; KNL-NEXT: knotw
+; KNL-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
+; SKX: kmovb ([[ARG1]]), %k{{[0-7]}}
+; SKX-NEXT: knotb
+; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]])
+
define void @mask8_mem(i8* %ptr) {
%x = load i8* %ptr, align 4
%m0 = bitcast i8 %x to <8 x i1>
@@ -45,13 +57,12 @@ define void @mask8_mem(i8* %ptr) {
%ret = bitcast <8 x i1> %m1 to i8
store i8 %ret, i8* %ptr, align 4
ret void
-; CHECK-LABEL: mask8_mem
-; CHECK: kmovw ([[ARG1]]), %k{{[0-7]}}
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; CHECK: ret
}
+; CHECK-LABEL: mand16
+; CHECK: kandw
+; CHECK: kxorw
+; CHECK: korw
define i16 @mand16(i16 %x, i16 %y) {
%ma = bitcast i16 %x to <16 x i1>
%mb = bitcast i16 %y to <16 x i1>
@@ -59,15 +70,11 @@ define i16 @mand16(i16 %x, i16 %y) {
%md = xor <16 x i1> %ma, %mb
%me = or <16 x i1> %mc, %md
%ret = bitcast <16 x i1> %me to i16
-; CHECK: kandw
-; CHECK: kxorw
-; CHECK: korw
ret i16 %ret
}
-; CHECK: shuf_test1
+; CHECK-LABEL: shuf_test1
; CHECK: kshiftrw $8
-; CHECK:ret
define i8 @shuf_test1(i16 %v) nounwind {
%v1 = bitcast i16 %v to <16 x i1>
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -75,11 +82,11 @@ define i8 @shuf_test1(i16 %v) nounwind {
ret i8 %mask1
}
-; CHECK: zext_test1
+; CHECK-LABEL: zext_test1
; CHECK: kshiftlw
; CHECK: kshiftrw
; CHECK: kmovw
-; CHECK:ret
+
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@@ -87,11 +94,11 @@ define i32 @zext_test1(<16 x i32> %a, <1
ret i32 %res
}
-; CHECK: zext_test2
+; CHECK-LABEL: zext_test2
; CHECK: kshiftlw
; CHECK: kshiftrw
; CHECK: kmovw
-; CHECK:ret
+
define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@@ -99,14 +106,29 @@ define i16 @zext_test2(<16 x i32> %a, <1
ret i16 %res
}
-; CHECK: zext_test3
+; CHECK-LABEL: zext_test3
; CHECK: kshiftlw
; CHECK: kshiftrw
; CHECK: kmovw
-; CHECK:ret
+
define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i8
ret i8 %res
}
+
+; CHECK-LABEL: conv1
+; KNL: kmovw %k0, %eax
+; KNL: movb %al, (%rdi)
+; SKX: kmovb %k0, (%rdi)
+define i8 @conv1(<8 x i1>* %R) {
+entry:
+ store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
+
+ %maskPtr = alloca <8 x i1>
+ store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
+ %mask = load <8 x i1>* %maskPtr
+ %mask_convert = bitcast <8 x i1> %mask to i8
+ ret i8 %mask_convert
+}
\ No newline at end of file
More information about the llvm-commits
mailing list