[PATCH][AVX512] Add 512b masked integer shift by immediate patterns
Cameron McInally
cameron.mcinally at nyu.edu
Thu Nov 13 19:58:36 PST 2014
Hi guys,
Here is a patch to add masked patterns for 512b integer shift by immediate.
Thanks,
Cam
-------------- next part --------------
Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td (revision 221940)
+++ lib/Target/X86/X86InstrAVX512.td (working copy)
@@ -3132,27 +3132,17 @@
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode, RegisterClass RC,
ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
- RegisterClass KRC> {
- def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
- (ins RC:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
- SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
- def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
- def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
- def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
- (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+ X86VectorVTInfo _> {
+ defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i8imm:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (vt (OpNode _.RC:$src1, (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+ defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (vt (OpNode (mem_frag addr:$src1), (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
}
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3183,42 +3173,48 @@
}
defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ VR512, v16i32, i512mem, memopv16i32,
+ v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
EVEX_CD8<32, CD8VQ>;
defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ VR512, v8i64, i512mem, memopv8i64,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
+ VR512, v16i32, i512mem, memopv16i32,
+ v16i32_info>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
EVEX_CD8<32, CD8VQ>;
defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ VR512, v8i64, i512mem, memopv8i64,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ VR512, v16i32, i512mem, memopv16i32,
+ v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
EVEX_CD8<32, CD8VQ>;
defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ VR512, v8i64, i512mem, memopv8i64,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
Index: lib/Target/X86/X86InstrFormats.td
===================================================================
--- lib/Target/X86/X86InstrFormats.td (revision 221894)
+++ lib/Target/X86/X86InstrFormats.td (working copy)
@@ -722,6 +722,10 @@
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
+class AVX512BIi8Base : PD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
Index: test/CodeGen/X86/avx512-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512-intrinsics.ll (revision 221894)
+++ test/CodeGen/X86/avx512-intrinsics.ll (working copy)
@@ -957,6 +957,13 @@
%res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
+
+define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
+ ; CHECK: vpslld $7, %zmm{{[0-9]+}}, %zmm{{[0-9]+}} {%k{{[0-9]+}}} {z}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 2)
+ ret <16 x i32> %res
+}
declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
@@ -966,6 +973,13 @@
%res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
+
+define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
+ ; CHECK: vpsllq $7, %zmm{{[0-9]+}}, %zmm{{[0-9]+}} {%k{{[0-9]+}}} {z}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 2)
+ ret <8 x i64> %res
+}
declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0) {
@@ -974,15 +988,28 @@
%res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
+
+define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
+ ; CHECK: vpsrld $7, %zmm{{[0-9]+}}, %zmm{{[0-9]+}} {%k{{[0-9]+}}} {z}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 2)
+ ret <16 x i32> %res
+}
declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
-
define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0) {
; CHECK-LABEL: test_x86_avx512_mask_psrli_q
; CHECK: vpsrlq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
+
+define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
+ ; CHECK: vpsrlq $7, %zmm{{[0-9]+}}, %zmm{{[0-9]+}} {%k{{[0-9]+}}} {z}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 2)
+ ret <8 x i64> %res
+}
declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0) {
@@ -991,13 +1018,25 @@
%res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
+define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
+ ; CHECK: vpsrad $7, %zmm{{[0-9]+}}, %zmm{{[0-9]+}} {%k{{[0-9]+}}} {z}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 2)
+ ret <16 x i32> %res
+}
declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
-
define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0) {
; CHECK-LABEL: test_x86_avx512_mask_psrai_q
; CHECK: vpsraq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
+
+define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
+ ; CHECK: vpsraq $7, %zmm{{[0-9]+}}, %zmm{{[0-9]+}} {%k{{[0-9]+}}} {z}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 2)
+ ret <8 x i64> %res
+}
declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
More information about the llvm-commits
mailing list