[llvm] r331472 - [X86] Split WriteVecShift/WriteVarVecShift into MMX, XMM and YMM/ZMM scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu May 3 10:56:44 PDT 2018
Author: rksimon
Date: Thu May 3 10:56:43 2018
New Revision: 331472
URL: http://llvm.org/viewvc/llvm-project?rev=331472&view=rev
Log:
[X86] Split WriteVecShift/WriteVarVecShift into MMX, XMM and YMM/ZMM scheduler classes
This took a bit of extra work as on Intel targets the old (V)PSLLDrr/(V)PSLLDrm style instructions act differently - I ended up creating WriteVecShiftImm classes for XMM/YMM/ZMM vector shift by immediate and retaining WriteVecShift as the default (used only by MMX) plus WriteVecShiftX/WriteVecShiftY. X86SchedWriteWidths hides most of this thank goodness.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrMMX.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86InstrXOP.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/xop-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu May 3 10:56:43 2018
@@ -5587,24 +5587,24 @@ multiclass avx512_shift_rmi_dq<bits<8> o
}
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
- SchedWriteVecShift>,
+ SchedWriteVecShiftImm>,
avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
- SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
- SchedWriteVecShift>,
+ SchedWriteVecShiftImm>,
avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
- SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
- SchedWriteVecShift>,
+ SchedWriteVecShiftImm>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
- SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
- SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
- SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V;
+ SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
SchedWriteVecShift>;
Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Thu May 3 10:56:43 2018
@@ -52,7 +52,8 @@ let Constraints = "$src1 = $dst" in {
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId,
- Intrinsic IntId2, X86FoldableSchedWrite sched> {
+ Intrinsic IntId2, X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite schedImm> {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -68,7 +69,7 @@ let Constraints = "$src1 = $dst" in {
(ins VR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>,
- Sched<[sched]>;
+ Sched<[schedImm]>;
}
}
@@ -412,30 +413,38 @@ defm MMX_PANDN : MMXI_binop_rm_int<0xDF,
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
- SchedWriteVecShift.MMX>;
+ SchedWriteVecShift.MMX,
+ SchedWriteVecShiftImm.MMX>;
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu May 3 10:56:43 2018
@@ -3396,6 +3396,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bi
string OpcodeStr, SDNode OpNode,
SDNode OpNode2, RegisterClass RC,
X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite schedImm,
ValueType DstVT, ValueType SrcVT,
PatFrag ld_frag, bit Is2Addr = 1> {
// src2 is always 128-bit
@@ -3420,25 +3421,28 @@ multiclass PDI_binop_rmi<bits<8> opc, bi
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>,
- Sched<[sched]>;
+ Sched<[schedImm]>;
}
multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, SDNode OpNode,
SDNode OpNode2, ValueType DstVT128,
ValueType DstVT256, ValueType SrcVT,
- X86SchedWriteWidths sched, Predicate prd> {
+ X86SchedWriteWidths sched,
+ X86SchedWriteWidths schedImm, Predicate prd> {
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
- OpNode, OpNode2, VR128, sched.XMM, DstVT128,
- SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
+ DstVT128, SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
- OpNode, OpNode2, VR256, sched.YMM, DstVT256,
- SrcVT, loadv2i64, 0>, VEX_4V, VEX_L, VEX_WIG;
+ OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
+ DstVT256, SrcVT, loadv2i64, 0>, VEX_4V, VEX_L,
+ VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
- VR128, sched.XMM, DstVT128, SrcVT, memopv2i64>;
+ VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
+ memopv2i64>;
}
multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
@@ -3469,25 +3473,30 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt in {
defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
v8i16, v16i16, v8i16, SchedWriteVecShift,
- NoVLX_Or_NoBWI>;
+ SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
- v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>;
+ v4i32, v8i32, v4i32, SchedWriteVecShift,
+ SchedWriteVecShiftImm, NoVLX>;
defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
- v2i64, v4i64, v2i64, SchedWriteVecShift, NoVLX>;
+ v2i64, v4i64, v2i64, SchedWriteVecShift,
+ SchedWriteVecShiftImm, NoVLX>;
defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
v8i16, v16i16, v8i16, SchedWriteVecShift,
- NoVLX_Or_NoBWI>;
+ SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
- v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>;
+ v4i32, v8i32, v4i32, SchedWriteVecShift,
+ SchedWriteVecShiftImm, NoVLX>;
defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
- v2i64, v4i64, v2i64, SchedWriteVecShift, NoVLX>;
+ v2i64, v4i64, v2i64, SchedWriteVecShift,
+ SchedWriteVecShiftImm, NoVLX>;
defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
v8i16, v16i16, v8i16, SchedWriteVecShift,
- NoVLX_Or_NoBWI>;
+ SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
- v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>;
+ v4i32, v8i32, v4i32, SchedWriteVecShift,
+ SchedWriteVecShiftImm, NoVLX>;
defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq,
SchedWriteShuffle>;
Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Thu May 3 10:56:43 2018
@@ -155,10 +155,14 @@ multiclass xop3opimm<bits<8> opc, string
}
let ExeDomain = SSEPackedInt in {
- defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8, SchedWriteVecShift.XMM>;
- defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32, SchedWriteVecShift.XMM>;
- defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64, SchedWriteVecShift.XMM>;
- defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16, SchedWriteVecShift.XMM>;
+ defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8,
+ SchedWriteVecShiftImm.XMM>;
+ defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32,
+ SchedWriteVecShiftImm.XMM>;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64,
+ SchedWriteVecShiftImm.XMM>;
+ defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16,
+ SchedWriteVecShiftImm.XMM>;
}
// Instruction where second source can be memory, but third must be register
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Thu May 3 10:56:43 2018
@@ -203,7 +203,6 @@ defm : BWWriteResPair<WriteVecALU, [BW
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
-defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
@@ -222,6 +221,17 @@ defm : BWWriteResPair<WritePSADBW, [BW
defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
+// Vector integer shifts.
+defm : BWWriteResPair<WriteVecShift, [BWPort0], 1, [1], 1, 5>;
+defm : BWWriteResPair<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2, 5>;
+defm : X86WriteRes<WriteVecShiftY, [BWPort0,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftYLd, [BWPort0,BWPort23], 7, [1,1], 2>;
+
+defm : BWWriteResPair<WriteVecShiftImmX, [BWPort0], 1, [1], 1, 5>; // Vector integer immediate shifts (XMM).
+defm : BWWriteResPair<WriteVecShiftImmY, [BWPort0], 1, [1], 1, 6>; // Vector integer immediate shifts (YMM/ZMM).
+defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 3, [2,1], 3, 5>; // Variable vector shifts.
+defm : BWWriteResPair<WriteVarVecShiftY, [BWPort0, BWPort5], 3, [2,1], 3, 6>; // Variable vector shifts (YMM/ZMM).
+
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [BWPort5]> {
let Latency = 2;
@@ -347,7 +357,6 @@ defm : BWWriteResPair<WriteFShuffle256,
defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
-defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
@@ -545,14 +554,6 @@ def BWWriteResGroup15 : SchedWriteRes<[B
def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr",
"(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr",
- "(V?)PSLLDrr",
- "(V?)PSLLQrr",
- "(V?)PSLLWrr",
- "(V?)PSRADrr",
- "(V?)PSRAWrr",
- "(V?)PSRLDrr",
- "(V?)PSRLQrr",
- "(V?)PSRLWrr",
"(V?)PTESTrr")>;
def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> {
@@ -676,15 +677,6 @@ def: InstRW<[BWWriteResGroup30], (instrs
XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
XCHG16ar, XCHG32ar, XCHG64ar)>;
-def BWWriteResGroup31 : SchedWriteRes<[BWPort0,BWPort5]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup31], (instregex "VPSLLVD(Y?)rr",
- "VPSRAVD(Y?)rr",
- "VPSRLVD(Y?)rr")>;
-
def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -757,14 +749,6 @@ def BWWriteResGroup40 : SchedWriteRes<[B
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup40], (instregex "VCVTPS2PDYrr",
- "VPSLLDYrr",
- "VPSLLQYrr",
- "VPSLLWYrr",
- "VPSRADYrr",
- "VPSRAWYrr",
- "VPSRLDYrr",
- "VPSRLQYrr",
- "VPSRLWYrr",
"VPTESTYrr")>;
def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
@@ -1066,16 +1050,8 @@ def BWWriteResGroup73 : SchedWriteRes<[B
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup73], (instregex "VPSLLDYrm",
- "VPSLLQYrm",
- "VPSLLVQYrm",
- "VPSLLWYrm",
- "VPSRADYrm",
- "VPSRAWYrm",
- "VPSRLDYrm",
- "VPSRLQYrm",
+def: InstRW<[BWWriteResGroup73], (instregex "VPSLLVQYrm",
"VPSRLVQYrm",
- "VPSRLWYrm",
"VTESTPDYrm",
"VTESTPSYrm")>;
@@ -1122,15 +1098,7 @@ def BWWriteResGroup81 : SchedWriteRes<[B
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup81], (instregex "(V?)PSLLDrm",
- "(V?)PSLLQrm",
- "(V?)PSLLWrm",
- "(V?)PSRADrm",
- "(V?)PSRAWrm",
- "(V?)PSRLDrm",
- "(V?)PSRLQrm",
- "(V?)PSRLWrm",
- "(V?)PTESTrm")>;
+def: InstRW<[BWWriteResGroup81], (instregex "(V?)PTESTrm")>;
def BWWriteResGroup82 : SchedWriteRes<[BWPort0,BWPort01,BWPort23]> {
let Latency = 7;
@@ -1233,15 +1201,6 @@ def: InstRW<[BWWriteResGroup94], (instre
"VPMASKMOVDYrm",
"VPMASKMOVQYrm")>;
-def BWWriteResGroup95 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[BWWriteResGroup95], (instregex "VPSLLVDrm",
- "VPSRAVDrm",
- "VPSRLVDrm")>;
-
def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
@@ -1359,15 +1318,6 @@ def BWWriteResGroup108 : SchedWriteRes<[
def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm",
"VPBROADCASTW(Y?)rm")>;
-def BWWriteResGroup109 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[BWWriteResGroup109], (instregex "VPSLLVDYrm",
- "VPSRAVDYrm",
- "VPSRLVDYrm")>;
-
def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Thu May 3 10:56:43 2018
@@ -194,7 +194,6 @@ def : WriteRes<WriteVecStore, [HW
def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; }
def : WriteRes<WriteVecMove, [HWPort015]>;
-defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 6>;
@@ -213,13 +212,23 @@ defm : HWWriteResPair<WriteShuffle256, [
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
-defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;
+// Vector integer shifts.
+defm : HWWriteResPair<WriteVecShift, [HWPort0], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
+defm : X86WriteRes<WriteVecShiftY, [HWPort0,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftYLd, [HWPort0,HWPort23], 8, [1,1], 2>;
+
+defm : HWWriteResPair<WriteVecShiftImmX, [HWPort0], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecShiftImmY, [HWPort0], 1, [1], 1, 7>;
+defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WriteVarVecShiftY, [HWPort0, HWPort5], 3, [2,1], 3, 7>;
+
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [HWPort5]> {
let Latency = 2;
@@ -834,16 +843,8 @@ def HWWriteResGroup11_2 : SchedWriteRes<
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLDYrm",
- "VPSLLQYrm",
- "VPSLLVQYrm",
- "VPSLLWYrm",
- "VPSRADYrm",
- "VPSRAWYrm",
- "VPSRLDYrm",
- "VPSRLQYrm",
+def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLVQYrm",
"VPSRLVQYrm",
- "VPSRLWYrm",
"VTESTPDYrm",
"VTESTPSYrm")>;
@@ -943,12 +944,12 @@ def: InstRW<[HWWriteResGroup16], (instre
"BLSMSK(32|64)rm",
"BLSR(32|64)rm",
"MOVBE(16|32|64)rm",
- "MMX_PABS(B|D|W)rm",
- "MMX_P(ADD|SUB)(B|D|W|Q)irm",
- "MMX_P(ADD|SUB)(U?)S(B|W)irm",
- "MMX_PAVG(B|W)irm",
- "MMX_PCMP(EQ|GT)(B|D|W)irm",
- "MMX_P(MAX|MIN)(SW|UB)irm",
+ "MMX_PABS(B|D|W)rm",
+ "MMX_P(ADD|SUB)(B|D|W|Q)irm",
+ "MMX_P(ADD|SUB)(U?)S(B|W)irm",
+ "MMX_PAVG(B|W)irm",
+ "MMX_PCMP(EQ|GT)(B|D|W)irm",
+ "MMX_P(MAX|MIN)(SW|UB)irm",
"MMX_PSIGN(B|D|W)rm")>;
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
@@ -1082,14 +1083,6 @@ def: InstRW<[HWWriteResGroup31], (instre
"VCVTPH2PSrr",
"(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr",
- "(V?)PSLLDrr",
- "(V?)PSLLQrr",
- "(V?)PSLLWrr",
- "(V?)PSRADrr",
- "(V?)PSRAWrr",
- "(V?)PSRLDrr",
- "(V?)PSRLQrr",
- "(V?)PSRLWrr",
"(V?)PTESTrr")>;
def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> {
@@ -1176,15 +1169,7 @@ def HWWriteResGroup38 : SchedWriteRes<[H
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup38], (instregex "(V?)PSLLDrm",
- "(V?)PSLLQrm",
- "(V?)PSLLWrm",
- "(V?)PSRADrm",
- "(V?)PSRAWrm",
- "(V?)PSRLDrm",
- "(V?)PSRLQrm",
- "(V?)PSRLWrm",
- "(V?)PTESTrm")>;
+def: InstRW<[HWWriteResGroup38], (instregex "(V?)PTESTrm")>;
def HWWriteResGroup39 : SchedWriteRes<[HWPort0,HWPort01,HWPort23]> {
let Latency = 7;
@@ -1338,15 +1323,6 @@ def: InstRW<[HWWriteResGroup54], (instrs
XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
XCHG16ar, XCHG32ar, XCHG64ar)>;
-def HWWriteResGroup55 : SchedWriteRes<[HWPort0,HWPort5]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup55], (instregex "VPSLLVD(Y?)rr",
- "VPSRAVD(Y?)rr",
- "VPSRLVD(Y?)rr")>;
-
def HWWriteResGroup57 : SchedWriteRes<[HWPort5,HWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -1400,24 +1376,6 @@ def HWWriteResGroup62 : SchedWriteRes<[H
def: InstRW<[HWWriteResGroup62], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m")>;
-def HWWriteResGroup63 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[HWWriteResGroup63], (instregex "VPSLLVDYrm",
- "VPSRAVDYrm",
- "VPSRLVDYrm")>;
-
-def HWWriteResGroup63_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[HWWriteResGroup63_1], (instregex "VPSLLVDrm",
- "VPSRAVDrm",
- "VPSRLVDrm")>;
-
def HWWriteResGroup64 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> {
let Latency = 8;
let NumMicroOps = 4;
@@ -1491,14 +1449,6 @@ def HWWriteResGroup71 : SchedWriteRes<[H
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup71], (instregex "VCVTPS2PDYrr",
- "VPSLLDYrr",
- "VPSLLQYrr",
- "VPSLLWYrr",
- "VPSRADYrr",
- "VPSRAWYrr",
- "VPSRLDYrr",
- "VPSRLQYrr",
- "VPSRLWYrr",
"VPTESTYrr")>;
def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> {
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Thu May 3 10:56:43 2018
@@ -174,7 +174,6 @@ def : WriteRes<WriteVecStore, [SB
def : WriteRes<WriteVecLoad, [SBPort23]> { let Latency = 6; }
def : WriteRes<WriteVecMove, [SBPort05]>;
-defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecALU, [SBPort15], 1, [1], 1, 6>;
@@ -197,6 +196,15 @@ defm : SBWriteResPair<WritePSADBW, [SBP
defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
+// Vector integer shifts.
+defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>;
+defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>;
+defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>;
+
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
let Latency = 2;
@@ -336,7 +344,6 @@ defm : SBWriteResPair<WriteFShuffle256,
defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>;
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAS, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>;
@@ -349,14 +356,6 @@ def SBWriteResGroup0 : SchedWriteRes<[SB
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup0], (instregex "(V?)CVTSS2SDrr",
- "(V?)PSLLDri",
- "(V?)PSLLQri",
- "(V?)PSLLWri",
- "(V?)PSRADri",
- "(V?)PSRAWri",
- "(V?)PSRLDri",
- "(V?)PSRLQri",
- "(V?)PSRLWri",
"VTESTPD(Y?)rr",
"VTESTPS(Y?)rr")>;
@@ -496,20 +495,6 @@ def SBWriteResGroup13 : SchedWriteRes<[S
def: InstRW<[SBWriteResGroup13], (instregex "(V?)CVTPS2PD(Y?)rr",
"(V?)PTEST(Y?)rr")>;
-def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup14], (instregex "(V?)PSLLDrr",
- "(V?)PSLLQrr",
- "(V?)PSLLWrr",
- "(V?)PSRADrr",
- "(V?)PSRAWrr",
- "(V?)PSRLDrr",
- "(V?)PSRLQrr",
- "(V?)PSRLWrr")>;
-
def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1073,20 +1058,6 @@ def SBWriteResGroup78 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup78], (instregex "(V?)PTESTrm")>;
-def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup79], (instregex "(V?)PSLLDrm",
- "(V?)PSLLQrm",
- "(V?)PSLLWrm",
- "(V?)PSRADrm",
- "(V?)PSRAWrm",
- "(V?)PSRLDrm",
- "(V?)PSRLQrm",
- "(V?)PSRLWrm")>;
-
def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
let Latency = 8;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Thu May 3 10:56:43 2018
@@ -199,7 +199,6 @@ defm : SKLWriteResPair<WriteVecALU, [S
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
-defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
defm : SKLWriteResPair<WriteVecIMul, [SKLPort01], 4, [1], 1, 6>; // Vector integer multiply.
defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
@@ -218,6 +217,17 @@ defm : SKLWriteResPair<WritePSADBW, [SK
defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
+// Vector integer shifts.
+defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1, [1], 1, 5>;
+defm : SKLWriteResPair<WriteVecShiftX, [SKLPort5,SKLPort01], 2, [1,1], 2, 6>;
+defm : X86WriteRes<WriteVecShiftY, [SKLPort5,SKLPort01], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftYLd, [SKLPort01,SKLPort23], 8, [1,1], 2>;
+
+defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM).
+defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM).
+defm : SKLWriteResPair<WriteVarVecShift, [SKLPort01], 1, [1], 1, 6>; // Variable vector shifts.
+defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM).
+
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKLPort5]> {
let Latency = 2;
@@ -353,7 +363,6 @@ defm : SKLWriteResPair<WriteFShuffle256,
defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
-defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [SKLPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
@@ -426,25 +435,6 @@ def SKLWriteResGroup4 : SchedWriteRes<[S
}
def: InstRW<[SKLWriteResGroup4], (instregex "JMP(16|32|64)r")>;
-def SKLWriteResGroup5 : SchedWriteRes<[SKLPort01]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PSLLD(Y?)ri",
- "(V?)PSLLQ(Y?)ri",
- "VPSLLVD(Y?)rr",
- "VPSLLVQ(Y?)rr",
- "(V?)PSLLW(Y?)ri",
- "(V?)PSRAD(Y?)ri",
- "VPSRAVD(Y?)rr",
- "(V?)PSRAW(Y?)ri",
- "(V?)PSRLD(Y?)ri",
- "(V?)PSRLQ(Y?)ri",
- "VPSRLVD(Y?)rr",
- "VPSRLVQ(Y?)rr",
- "(V?)PSRLW(Y?)ri")>;
-
def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> {
let Latency = 1;
let NumMicroOps = 1;
@@ -619,20 +609,6 @@ def: InstRW<[SKLWriteResGroup18], (instr
"VPMASKMOVD(Y?)mr",
"VPMASKMOVQ(Y?)mr")>;
-def SKLWriteResGroup19 : SchedWriteRes<[SKLPort5,SKLPort01]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup19], (instregex "(V?)PSLLDrr",
- "(V?)PSLLQrr",
- "(V?)PSLLWrr",
- "(V?)PSRADrr",
- "(V?)PSRAWrr",
- "(V?)PSRLDrr",
- "(V?)PSRLQrr",
- "(V?)PSRLWrr")>;
-
def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -895,20 +871,6 @@ def SKLWriteResGroup51_16 : SchedWriteRe
}
def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>;
-def SKLWriteResGroup52 : SchedWriteRes<[SKLPort5,SKLPort01]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLDYrr",
- "VPSLLQYrr",
- "VPSLLWYrr",
- "VPSRADYrr",
- "VPSRAWYrr",
- "VPSRLDYrr",
- "VPSRLQYrr",
- "VPSRLWYrr")>;
-
def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -1263,16 +1225,11 @@ def SKLWriteResGroup90 : SchedWriteRes<[
}
def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PSLLDrm",
"(V?)PSLLQrm",
- "VPSLLVDrm",
- "VPSLLVQrm",
"(V?)PSLLWrm",
"(V?)PSRADrm",
- "VPSRAVDrm",
"(V?)PSRAWrm",
"(V?)PSRLDrm",
"(V?)PSRLQrm",
- "(V?)PSRLVDrm",
- "VPSRLVQrm",
"(V?)PSRLWrm")>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
@@ -1431,25 +1388,6 @@ def: InstRW<[SKLWriteResGroup108], (inst
"VPMOVSXBQYrm",
"VPMOVSXWQYrm")>;
-def SKLWriteResGroup109 : SchedWriteRes<[SKLPort01,SKLPort23]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLDYrm",
- "VPSLLQYrm",
- "VPSLLVDYrm",
- "VPSLLVQYrm",
- "VPSLLWYrm",
- "VPSRADYrm",
- "VPSRAVDYrm",
- "VPSRAWYrm",
- "VPSRLDYrm",
- "VPSRLQYrm",
- "VPSRLVDYrm",
- "VPSRLVQYrm",
- "VPSRLWYrm")>;
-
def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 8;
let NumMicroOps = 2;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Thu May 3 10:56:43 2018
@@ -199,7 +199,6 @@ defm : SKXWriteResPair<WriteVecALU, [S
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
-defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
defm : SKXWriteResPair<WriteVecIMul, [SKXPort015], 4, [1], 1, 6>; // Vector integer multiply.
defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
@@ -218,6 +217,18 @@ defm : SKXWriteResPair<WritePSADBW, [SK
defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
+// Vector integer shifts.
+defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>;
+defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>;
+
+defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM).
+defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM).
+defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts.
+defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM).
+
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKXPort5]> {
let Latency = 2;
@@ -353,7 +364,6 @@ defm : SKXWriteResPair<WriteFShuffle256,
defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
-defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
@@ -474,116 +484,6 @@ def SKXWriteResGroup4 : SchedWriteRes<[S
}
def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
-def SKXWriteResGroup5 : SchedWriteRes<[SKXPort01]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ128ri",
- "VPROLDZ256ri",
- "VPROLDZri",
- "VPROLQZ128ri",
- "VPROLQZ256ri",
- "VPROLQZri",
- "VPROLVDZ128rr",
- "VPROLVDZ256rr",
- "VPROLVDZrr",
- "VPROLVQZ128rr",
- "VPROLVQZ256rr",
- "VPROLVQZrr",
- "VPRORDZ128ri",
- "VPRORDZ256ri",
- "VPRORDZri",
- "VPRORQZ128ri",
- "VPRORQZ256ri",
- "VPRORQZri",
- "VPRORVDZ128rr",
- "VPRORVDZ256rr",
- "VPRORVDZrr",
- "VPRORVQZ128rr",
- "VPRORVQZ256rr",
- "VPRORVQZrr",
- "(V?)PSLLDYri",
- "VPSLLDZ128ri",
- "VPSLLDZ256ri",
- "VPSLLDZri",
- "(V?)PSLLDri",
- "VPSLLQYri",
- "VPSLLQZ128ri",
- "VPSLLQZ256ri",
- "VPSLLQZri",
- "(V?)PSLLQri",
- "VPSLLVDYrr",
- "VPSLLVDZ128rr",
- "VPSLLVDZ256rr",
- "VPSLLVDZrr",
- "VPSLLVDrr",
- "VPSLLVQYrr",
- "VPSLLVQZ128rr",
- "VPSLLVQZ256rr",
- "VPSLLVQZrr",
- "VPSLLVQrr",
- "VPSLLVWZ128rr",
- "VPSLLVWZ256rr",
- "VPSLLVWZrr",
- "VPSLLWYri",
- "VPSLLWZ128ri",
- "VPSLLWZ256ri",
- "VPSLLWZri",
- "(V?)PSLLWri",
- "VPSRADYri",
- "VPSRADZ128ri",
- "VPSRADZ256ri",
- "VPSRADZri",
- "(V?)PSRADri",
- "VPSRAQZ128ri",
- "VPSRAQZ256ri",
- "VPSRAQZri",
- "VPSRAVDYrr",
- "VPSRAVDZ128rr",
- "VPSRAVDZ256rr",
- "VPSRAVDZrr",
- "VPSRAVDrr",
- "VPSRAVQZ128rr",
- "VPSRAVQZ256rr",
- "VPSRAVQZrr",
- "VPSRAVWZ128rr",
- "VPSRAVWZ256rr",
- "VPSRAVWZrr",
- "VPSRAWYri",
- "VPSRAWZ128ri",
- "VPSRAWZ256ri",
- "VPSRAWZri",
- "(V?)PSRAWri",
- "VPSRLDYri",
- "VPSRLDZ128ri",
- "VPSRLDZ256ri",
- "VPSRLDZri",
- "(V?)PSRLDri",
- "VPSRLQYri",
- "VPSRLQZ128ri",
- "VPSRLQZ256ri",
- "VPSRLQZri",
- "(V?)PSRLQri",
- "VPSRLVDYrr",
- "VPSRLVDZ128rr",
- "VPSRLVDZ256rr",
- "VPSRLVDZrr",
- "VPSRLVDrr",
- "VPSRLVQYrr",
- "VPSRLVQZ128rr",
- "VPSRLVQZ256rr",
- "VPSRLVQZrr",
- "VPSRLVQrr",
- "VPSRLVWZ128rr",
- "VPSRLVWZ256rr",
- "VPSRLVWZrr",
- "VPSRLWYri",
- "VPSRLWZ128ri",
- "VPSRLWZ256ri",
- "VPSRLWZri",
- "(V?)PSRLWri")>;
-
def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> {
let Latency = 1;
let NumMicroOps = 1;
@@ -915,28 +815,6 @@ def: InstRW<[SKXWriteResGroup18], (instr
"VPMASKMOVQYmr",
"VPMASKMOVQmr")>;
-def SKXWriteResGroup19 : SchedWriteRes<[SKXPort5,SKXPort01]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLDZ128rr",
- "(V?)PSLLDrr",
- "VPSLLQZ128rr",
- "(V?)PSLLQrr",
- "VPSLLWZ128rr",
- "(V?)PSLLWrr",
- "VPSRADZ128rr",
- "(V?)PSRADrr",
- "VPSRAQZ128rr",
- "VPSRAWZ128rr",
- "(V?)PSRAWrr",
- "VPSRLDZ128rr",
- "(V?)PSRLDrr",
- "VPSRLQZ128rr",
- "(V?)PSRLQrr",
- "(V?)PSRLWrr")>;
-
def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1462,38 +1340,6 @@ def SKXWriteResGroup52_16 : SchedWriteRe
}
def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>;
-def SKXWriteResGroup53 : SchedWriteRes<[SKXPort5,SKXPort01]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDYrr",
- "VPSLLDZ256rr",
- "VPSLLDZrr",
- "VPSLLQYrr",
- "VPSLLQZ256rr",
- "VPSLLQZrr",
- "VPSLLWYrr",
- "VPSLLWZ256rr",
- "VPSLLWZrr",
- "VPSRADYrr",
- "VPSRADZ256rr",
- "VPSRADZrr",
- "VPSRAQZ256rr",
- "VPSRAQZrr",
- "VPSRAWYrr",
- "VPSRAWZ256rr",
- "VPSRAWZrr",
- "VPSRLDYrr",
- "VPSRLDZ256rr",
- "VPSRLDZrr",
- "VPSRLQYrr",
- "VPSRLQZ256rr",
- "VPSRLQZrr",
- "VPSRLWYrr",
- "VPSRLWZ256rr",
- "VPSRLWZrr")>;
-
def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -2066,59 +1912,6 @@ def: InstRW<[SKXWriteResGroup93], (instr
"VCVTUQQ2PSZ256rr",
"VCVTUQQ2PSZrr")>;
-def SKXWriteResGroup94 : SchedWriteRes<[SKXPort01,SKXPort23]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup94], (instregex "VPROLDZ128m(b?)i",
- "VPROLQZ128m(b?)i",
- "VPROLVDZ128rm(b?)",
- "VPROLVQZ128rm(b?)",
- "VPRORDZ128m(b?)i",
- "VPRORQZ128m(b?)i",
- "VPRORVDZ128rm(b?)",
- "VPRORVQZ128rm(b?)",
- "VPSLLDZ128m(b?)i",
- "VPSLLDZ128rm(b?)",
- "(V?)PSLLDrm",
- "VPSLLQZ128m(b?)i",
- "VPSLLQZ128rm(b?)",
- "(V?)PSLLQrm",
- "VPSLLVDZ128rm(b?)",
- "VPSLLVDrm",
- "VPSLLVQZ128rm(b?)",
- "VPSLLVQrm",
- "VPSLLVWZ128rm(b?)",
- "VPSLLWZ128mi(b?)",
- "VPSLLWZ128rm(b?)",
- "(V?)PSLLWrm",
- "VPSRADZ128m(b?)i",
- "VPSRADZ128rm(b?)",
- "(V?)PSRADrm",
- "VPSRAQZ128m(b?)i",
- "VPSRAQZ128rm(b?)",
- "VPSRAVDZ128rm(b?)",
- "VPSRAVDrm",
- "VPSRAVQZ128rm(b?)",
- "VPSRAVWZ128rm(b?)",
- "VPSRAWZ128mi(b?)",
- "VPSRAWZ128rm(b?)",
- "(V?)PSRAWrm",
- "VPSRLDZ128m(b?)i",
- "VPSRLDZ128rm(b?)",
- "(V?)PSRLDrm",
- "VPSRLQZ128m(b?)i",
- "VPSRLQZ128rm(b?)",
- "(V?)PSRLQrm",
- "VPSRLVDZ128rm(b?)",
- "VPSRLVDrm",
- "VPSRLVQZ128rm(b?)",
- "VPSRLVQrm",
- "VPSRLVWZ128rm(b?)",
- "VPSRLWZ128mi(b?)",
- "VPSRLWZ128rm(b?)",
- "(V?)PSRLWrm")>;
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
@@ -2406,95 +2199,6 @@ def: InstRW<[SKXWriteResGroup119], (inst
"VPMOVSXBQYrm",
"VPMOVSXWQYrm")>;
-def SKXWriteResGroup120 : SchedWriteRes<[SKXPort01,SKXPort23]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZ256m(b?)i",
- "VPROLDZm(b?)i",
- "VPROLQZ256m(b?)i",
- "VPROLQZm(b?)i",
- "VPROLVDZ256rm(b?)",
- "VPROLVDZrm(b?)",
- "VPROLVQZ256rm(b?)",
- "VPROLVQZrm(b?)",
- "VPRORDZ256m(b?)i",
- "VPRORDZm(b?)i",
- "VPRORQZ256m(b?)i",
- "VPRORQZm(b?)i",
- "VPRORVDZ256rm(b?)",
- "VPRORVDZrm(b?)",
- "VPRORVQZ256rm(b?)",
- "VPRORVQZrm(b?)",
- "VPSLLDYrm",
- "VPSLLDZ256m(b?)i",
- "VPSLLDZ256rm(b?)",
- "VPSLLDZm(b?)i",
- "VPSLLDZrm(b?)",
- "VPSLLQYrm",
- "VPSLLQZ256m(b?)i",
- "VPSLLQZ256rm(b?)",
- "VPSLLQZm(b?)i",
- "VPSLLQZrm(b?)",
- "VPSLLVDYrm",
- "VPSLLVDZ256rm(b?)",
- "VPSLLVDZrm(b?)",
- "VPSLLVQYrm",
- "VPSLLVQZ256rm(b?)",
- "VPSLLVQZrm(b?)",
- "VPSLLVWZ256rm(b?)",
- "VPSLLVWZrm(b?)",
- "VPSLLWYrm",
- "VPSLLWZ256mi(b?)",
- "VPSLLWZ256rm(b?)",
- "VPSLLWZmi(b?)",
- "VPSLLWZrm(b?)",
- "VPSRADYrm",
- "VPSRADZ256m(b?)i",
- "VPSRADZ256rm(b?)",
- "VPSRADZm(b?)i",
- "VPSRADZrm(b?)",
- "VPSRAQZ256m(b?)i",
- "VPSRAQZ256rm(b?)",
- "VPSRAQZm(b?)i",
- "VPSRAQZrm(b?)",
- "VPSRAVDYrm",
- "VPSRAVDZ256rm(b?)",
- "VPSRAVDZrm(b?)",
- "VPSRAVQZ256rm(b?)",
- "VPSRAVQZrm(b?)",
- "VPSRAVWZ256rm(b?)",
- "VPSRAVWZrm(b?)",
- "VPSRAWYrm",
- "VPSRAWZ256mi(b?)",
- "VPSRAWZ256rm(b?)",
- "VPSRAWZmi(b?)",
- "VPSRAWZrm(b?)",
- "VPSRLDYrm",
- "VPSRLDZ256m(b?)i",
- "VPSRLDZ256rm(b?)",
- "VPSRLDZm(b?)i",
- "VPSRLDZrm(b?)",
- "VPSRLQYrm",
- "VPSRLQZ256m(b?)i",
- "VPSRLQZ256rm(b?)",
- "VPSRLQZm(b?)i",
- "VPSRLQZrm(b?)",
- "VPSRLVDYrm",
- "VPSRLVDZ256rm(b?)",
- "VPSRLVDZrm(b?)",
- "VPSRLVQYrm",
- "VPSRLVQZ256rm(b?)",
- "VPSRLVQZrm(b?)",
- "VPSRLVWZ256rm(b?)",
- "VPSRLVWZrm(b?)",
- "VPSRLWYrm",
- "VPSRLWZ256mi(b?)",
- "VPSRLWZ256rm(b?)",
- "VPSRLWZmi(b?)",
- "VPSRLWZrm(b?)")>;
-
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
let NumMicroOps = 2;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Thu May 3 10:56:43 2018
@@ -19,6 +19,17 @@ def ReadAfterLd : SchedRead;
// load + WriteRMW.
def WriteRMW : SchedWrite;
+// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
+multiclass X86WriteRes<SchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res, int UOps> {
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+}
+
// Most instructions can fold loads, so almost every SchedWrite comes in two
// variants: With and without a folded load.
// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
@@ -137,7 +148,11 @@ defm WriteVecALU : X86SchedWritePair;
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
-defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
+defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
+defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM/ZMM).
+defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
+defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM/ZMM).
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM).
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
@@ -205,7 +220,8 @@ defm WriteFShuffle256 : X86SchedWritePai
defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
-defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
+defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
+defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM/ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
@@ -258,11 +274,14 @@ def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
WriteVecLogicY, WriteVecLogicY>;
def SchedWriteVecShift
- : X86SchedWriteWidths<WriteVecShift, WriteVecShift,
- WriteVecShift, WriteVecShift>;
+ : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
+ WriteVecShiftY, WriteVecShiftY>;
+def SchedWriteVecShiftImm
+ : X86SchedWriteWidths<WriteVecShift, WriteVecShiftImmX,
+ WriteVecShiftImmY, WriteVecShiftImmY>;
def SchedWriteVarVecShift
: X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
- WriteVarVecShift, WriteVarVecShift>;
+ WriteVarVecShiftY, WriteVarVecShiftY>;
def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMul,
WriteVecIMulY, WriteVecIMulY>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Thu May 3 10:56:43 2018
@@ -256,6 +256,10 @@ defm : AtomWriteResPair<WriteVecALUY,
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecShiftY, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecShiftImmY, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteVecIMulY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>;
@@ -276,6 +280,7 @@ defm : AtomWriteResPair<WriteVarBlendY,
defm : AtomWriteResPair<WriteShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteVarVecShift, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarVecShiftY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Thu May 3 10:56:43 2018
@@ -406,6 +406,10 @@ def : WriteRes<WriteVecMove,
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftY, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftImmY,[JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
@@ -428,6 +432,7 @@ defm : JWriteResFpuPair<WriteVecLogicY,
defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
+defm : JWriteResFpuPair<WriteVarVecShiftY,[JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Thu May 3 10:56:43 2018
@@ -162,7 +162,11 @@ def : WriteRes<WriteVecStore, [SL
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
-defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
@@ -288,7 +292,6 @@ def : WriteRes<WriteCLMulLd, [SLM_FPC_RS
let ResourceCycles = [10, 1];
}
-
def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteFence, [SLM_MEC_RSV]>;
@@ -306,7 +309,8 @@ defm : SLMWriteResPair<WriteFShuffle256,
defm : SLMWriteResPair<WriteFVarShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle256, [SLM_FPC_RSV0], 1>;
-defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarVecShiftY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMA, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMAS, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0], 1>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Thu May 3 10:56:43 2018
@@ -235,6 +235,10 @@ def : WriteRes<WriteVecMove,
def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; }
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>;
@@ -258,7 +262,8 @@ defm : ZnWriteResFpuPair<WritePSADBWY,
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
// Vector Shift Operations
-defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>;
// Vector insert/extract operations.
defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Thu May 3 10:56:43 2018
@@ -5450,8 +5450,8 @@ declare <16 x i16> @llvm.x86.avx2.psign.
define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_pslld:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5534,8 +5534,8 @@ define <32 x i8> @test_pslldq(<32 x i8>
define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; GENERIC-LABEL: test_psllq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5585,7 +5585,7 @@ define <4 x i32> @test_psllvd(<4 x i32>
; GENERIC-LABEL: test_psllvd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psllvd:
@@ -5628,7 +5628,7 @@ define <8 x i32> @test_psllvd_ymm(<8 x i
; GENERIC-LABEL: test_psllvd_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psllvd_ymm:
@@ -5671,7 +5671,7 @@ define <2 x i64> @test_psllvq(<2 x i64>
; GENERIC-LABEL: test_psllvq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psllvq:
@@ -5714,7 +5714,7 @@ define <4 x i64> @test_psllvq_ymm(<4 x i
; GENERIC-LABEL: test_psllvq_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psllvq_ymm:
@@ -5756,8 +5756,8 @@ declare <4 x i64> @llvm.x86.avx2.psllv.q
define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; GENERIC-LABEL: test_psllw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5806,8 +5806,8 @@ declare <16 x i16> @llvm.x86.avx2.psll.w
define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_psrad:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5857,7 +5857,7 @@ define <4 x i32> @test_psravd(<4 x i32>
; GENERIC-LABEL: test_psravd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psravd:
@@ -5900,7 +5900,7 @@ define <8 x i32> @test_psravd_ymm(<8 x i
; GENERIC-LABEL: test_psravd_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psravd_ymm:
@@ -5942,8 +5942,8 @@ declare <8 x i32> @llvm.x86.avx2.psrav.d
define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; GENERIC-LABEL: test_psraw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5992,8 +5992,8 @@ declare <16 x i16> @llvm.x86.avx2.psra.w
define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_psrld:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6076,8 +6076,8 @@ define <32 x i8> @test_psrldq(<32 x i8>
define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; GENERIC-LABEL: test_psrlq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6127,7 +6127,7 @@ define <4 x i32> @test_psrlvd(<4 x i32>
; GENERIC-LABEL: test_psrlvd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psrlvd:
@@ -6170,7 +6170,7 @@ define <8 x i32> @test_psrlvd_ymm(<8 x i
; GENERIC-LABEL: test_psrlvd_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psrlvd_ymm:
@@ -6213,7 +6213,7 @@ define <2 x i64> @test_psrlvq(<2 x i64>
; GENERIC-LABEL: test_psrlvq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psrlvq:
@@ -6256,7 +6256,7 @@ define <4 x i64> @test_psrlvq_ymm(<4 x i
; GENERIC-LABEL: test_psrlvq_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psrlvq_ymm:
@@ -6298,8 +6298,8 @@ declare <4 x i64> @llvm.x86.avx2.psrlv.q
define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; GENERIC-LABEL: test_psrlw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Thu May 3 10:56:43 2018
@@ -3404,8 +3404,8 @@ declare <16 x i32> @llvm.x86.avx512.mask
define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
@@ -3424,8 +3424,8 @@ declare <8 x i64> @llvm.x86.avx512.mask.
define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Thu May 3 10:56:43 2018
@@ -2864,8 +2864,8 @@ declare <4 x i32> @llvm.x86.avx512.mask.
define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
@@ -2884,8 +2884,8 @@ declare <8 x i32> @llvm.x86.avx512.mask.
define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
@@ -2904,8 +2904,8 @@ declare <2 x i64> @llvm.x86.avx512.mask.
define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
@@ -2924,8 +2924,8 @@ declare <4 x i64> @llvm.x86.avx512.mask.
define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
@@ -3024,8 +3024,8 @@ declare <4 x i32> @llvm.x86.avx512.mask.
define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
@@ -3044,8 +3044,8 @@ declare <8 x i32> @llvm.x86.avx512.mask.
define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
@@ -3064,8 +3064,8 @@ declare <2 x i64> @llvm.x86.avx512.mask.
define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
@@ -3084,8 +3084,8 @@ declare <4 x i64> @llvm.x86.avx512.mask.
define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=331472&r1=331471&r2=331472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Thu May 3 10:56:43 2018
@@ -869,22 +869,22 @@ define void @test_vprot(<2 x i64> %a0, <
; GENERIC-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -925,14 +925,14 @@ define void @test_vpsha(<2 x i64> %a0, <
; GENERIC-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -965,14 +965,14 @@ define void @test_vpshl(<2 x i64> %a0, <
; GENERIC-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list