[llvm] r331453 - [X86] Split WriteVecALU/WritePHAdd into XMM and YMM/ZMM scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu May 3 06:27:11 PDT 2018
Author: rksimon
Date: Thu May 3 06:27:10 2018
New Revision: 331453
URL: http://llvm.org/viewvc/llvm-project?rev=331453&view=rev
Log:
[X86] Split WriteVecALU/WritePHAdd into XMM and YMM/ZMM scheduler classes
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll
llvm/trunk/test/CodeGen/X86/xop-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu May 3 06:27:10 2018
@@ -4684,16 +4684,16 @@ let ImmT = NoImm, Predicates = [HasAVX]
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V;
+ SchedWritePHAdd.XMM, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
@@ -4705,10 +4705,10 @@ let isCommutable = 0 in {
SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- WritePHAdd, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- WritePHAdd, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
}
@@ -4730,16 +4730,16 @@ let ImmT = NoImm, Predicates = [HasAVX2]
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@@ -4748,10 +4748,10 @@ let isCommutable = 0 in {
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
- WritePHAdd>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
- WritePHAdd>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -4759,13 +4759,13 @@ let isCommutable = 0 in {
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, memopv2i64>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
@@ -4776,10 +4776,10 @@ let isCommutable = 0 in {
memopv2i64, i128mem, SchedWriteVarShuffle.XMM>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128,
- WritePHAdd, memopv2i64>;
+ SchedWritePHAdd.XMM, memopv2i64>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128,
- WritePHAdd, memopv2i64>;
+ SchedWritePHAdd.XMM, memopv2i64>;
defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
v16i8, VR128, memopv2i64, i128mem,
SchedWriteVecIMul.XMM>;
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Thu May 3 06:27:10 2018
@@ -199,7 +199,8 @@ def : WriteRes<WriteVecLoad, [BW
def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteVecMove, [BWPort015]>;
-defm : BWWriteResPair<WriteVecALU, [BWPort15], 1>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
@@ -365,9 +366,10 @@ def : WriteRes<WriteNop, []>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3>;
+defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3, 5>;
defm : BWWriteResPair<WriteFHAddY, [BWPort1,BWPort5], 5, [1,2], 3, 6>;
-defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3>;
+defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3, 5>;
+defm : BWWriteResPair<WritePHAddY, [BWPort5,BWPort15], 3, [2,1], 3, 6>;
// Remaining instrs.
@@ -1087,55 +1089,6 @@ def: InstRW<[BWWriteResGroup74], (instre
"FCOMP32m",
"FCOMP64m")>;
-def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup76], (instregex "VPABSBYrm",
- "VPABSDYrm",
- "VPABSWYrm",
- "VPADDBYrm",
- "VPADDDYrm",
- "VPADDQYrm",
- "VPADDSBYrm",
- "VPADDSWYrm",
- "VPADDUSBYrm",
- "VPADDUSWYrm",
- "VPADDWYrm",
- "VPAVGBYrm",
- "VPAVGWYrm",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSDYrm",
- "VPMAXSWYrm",
- "VPMAXUBYrm",
- "VPMAXUDYrm",
- "VPMAXUWYrm",
- "VPMINSBYrm",
- "VPMINSDYrm",
- "VPMINSWYrm",
- "VPMINUBYrm",
- "VPMINUDYrm",
- "VPMINUWYrm",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
- "VPSUBBYrm",
- "VPSUBDYrm",
- "VPSUBQYrm",
- "VPSUBSBYrm",
- "VPSUBSWYrm",
- "VPSUBUSBYrm",
- "VPSUBUSWYrm",
- "VPSUBWYrm")>;
-
def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
let Latency = 7;
let NumMicroOps = 2;
@@ -1415,18 +1368,6 @@ def: InstRW<[BWWriteResGroup109], (instr
"VPSRAVDYrm",
"VPSRLVDYrm")>;
-def BWWriteResGroup110 : SchedWriteRes<[BWPort5,BWPort23,BWPort15]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[BWWriteResGroup110], (instregex "VPHADDDYrm",
- "VPHADDSWYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBSWYrm",
- "VPHSUBWYrm")>;
-
def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Thu May 3 06:27:10 2018
@@ -197,7 +197,8 @@ def : WriteRes<WriteVecMove, [HW
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
-defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
+defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecALUY, [HWPort15], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
@@ -608,7 +609,8 @@ def : InstRW<[HWWriteFXTRACT], (instrs F
defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
-defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
//=== Floating Point XMM and YMM Instructions ===//
@@ -940,105 +942,14 @@ def: InstRW<[HWWriteResGroup16], (instre
"BLSI(32|64)rm",
"BLSMSK(32|64)rm",
"BLSR(32|64)rm",
- "MOVBE(16|32|64)rm")>;
-
-def HWWriteResGroup16_1 : SchedWriteRes<[HWPort23,HWPort15]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup16_1], (instregex "(V?)PABSBrm",
- "(V?)PABSDrm",
- "(V?)PABSWrm",
- "(V?)PADDBrm",
- "(V?)PADDDrm",
- "(V?)PADDQrm",
- "(V?)PADDSBrm",
- "(V?)PADDSWrm",
- "(V?)PADDUSBrm",
- "(V?)PADDUSWrm",
- "(V?)PADDWrm",
- "(V?)PAVGBrm",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "(V?)PMAXSBrm",
- "(V?)PMAXSDrm",
- "(V?)PMAXSWrm",
- "(V?)PMAXUBrm",
- "(V?)PMAXUDrm",
- "(V?)PMAXUWrm",
- "(V?)PMINSBrm",
- "(V?)PMINSDrm",
- "(V?)PMINSWrm",
- "(V?)PMINUBrm",
- "(V?)PMINUDrm",
- "(V?)PMINUWrm",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
- "(V?)PSUBBrm",
- "(V?)PSUBDrm",
- "(V?)PSUBQrm",
- "(V?)PSUBSBrm",
- "(V?)PSUBSWrm",
- "(V?)PSUBUSBrm",
- "(V?)PSUBUSWrm",
- "(V?)PSUBWrm")>;
-
-def HWWriteResGroup16_2 : SchedWriteRes<[HWPort23,HWPort15]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup16_2], (instregex "VPABSBYrm",
- "VPABSDYrm",
- "VPABSWYrm",
- "VPADDBYrm",
- "VPADDDYrm",
- "VPADDQYrm",
- "VPADDSBYrm",
- "VPADDSWYrm",
- "VPADDUSBYrm",
- "VPADDUSWYrm",
- "VPADDWYrm",
- "VPAVGBYrm",
- "VPAVGWYrm",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSDYrm",
- "VPMAXSWYrm",
- "VPMAXUBYrm",
- "VPMAXUDYrm",
- "VPMAXUWYrm",
- "VPMINSBYrm",
- "VPMINSDYrm",
- "VPMINSWYrm",
- "VPMINUBYrm",
- "VPMINUDYrm",
- "VPMINUWYrm",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
- "VPSUBBYrm",
- "VPSUBDYrm",
- "VPSUBQYrm",
- "VPSUBSBYrm",
- "VPSUBSWYrm",
- "VPSUBUSBYrm",
- "VPSUBUSWYrm",
- "VPSUBWYrm")>;
+ "MOVBE(16|32|64)rm",
+ "MMX_PABS(B|D|W)rm",
+ "MMX_P(ADD|SUB)(B|D|W|Q)irm",
+ "MMX_P(ADD|SUB)(U?)S(B|W)irm",
+ "MMX_PAVG(B|W)irm",
+ "MMX_PCMP(EQ|GT)(B|D|W)irm",
+ "MMX_P(MAX|MIN)(SW|UB)irm",
+ "MMX_PSIGN(B|D|W)rm")>;
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 7;
@@ -1514,18 +1425,6 @@ def HWWriteResGroup64 : SchedWriteRes<[H
}
def: InstRW<[HWWriteResGroup64], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>;
-def HWWriteResGroup64_1 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[HWWriteResGroup64_1], (instregex "VPHADDDYrm",
- "VPHADDSWYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBSWYrm",
- "VPHSUBWYrm")>;
-
def HWWriteResGroup65 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
let Latency = 8;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Thu May 3 06:27:10 2018
@@ -177,7 +177,8 @@ def : WriteRes<WriteVecMove, [SB
defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
+defm : SBWriteResPair<WriteVecALU, [SBPort15], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
@@ -222,6 +223,7 @@ def : WriteRes<WriteVecExtractSt, [SBPor
defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 6>;
+defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
////////////////////////////////////////////////////////////////////////////////
// String instructions.
@@ -406,47 +408,12 @@ def SBWriteResGroup5 : SchedWriteRes<[SB
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
- "MMX_PADDQirr",
- "MMX_PALIGNRrri",
- "MMX_PSIGN(B|D|W)rr",
- "(V?)PABSBrr",
- "(V?)PABSDrr",
- "(V?)PABSWrr",
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNRrri",
"(V?)PACKSSDWrr",
"(V?)PACKSSWBrr",
"(V?)PACKUSDWrr",
"(V?)PACKUSWBrr",
- "(V?)PADDBrr",
- "(V?)PADDDrr",
- "(V?)PADDQrr",
- "(V?)PADDSBrr",
- "(V?)PADDSWrr",
- "(V?)PADDUSBrr",
- "(V?)PADDUSWrr",
- "(V?)PADDWrr",
"(V?)PALIGNRrri",
- "(V?)PAVGBrr",
- "(V?)PAVGWrr",
- "(V?)PCMPEQBrr",
- "(V?)PCMPEQDrr",
- "(V?)PCMPEQQrr",
- "(V?)PCMPEQWrr",
- "(V?)PCMPGTBrr",
- "(V?)PCMPGTDrr",
- "(V?)PCMPGTWrr",
- "(V?)PMAXSBrr",
- "(V?)PMAXSDrr",
- "(V?)PMAXSWrr",
- "(V?)PMAXUBrr",
- "(V?)PMAXUDrr",
- "(V?)PMAXUWrr",
- "(V?)PMINSBrr",
- "(V?)PMINSDrr",
- "(V?)PMINSWrr",
- "(V?)PMINUBrr",
- "(V?)PMINUDrr",
- "(V?)PMINUWrr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -462,19 +429,8 @@ def: InstRW<[SBWriteResGroup5], (instreg
"(V?)PSHUFDri",
"(V?)PSHUFHWri",
"(V?)PSHUFLWri",
- "(V?)PSIGNBrr",
- "(V?)PSIGNDrr",
- "(V?)PSIGNWrr",
"(V?)PSLLDQri",
"(V?)PSRLDQri",
- "(V?)PSUBBrr",
- "(V?)PSUBDrr",
- "(V?)PSUBQrr",
- "(V?)PSUBSBrr",
- "(V?)PSUBSWrr",
- "(V?)PSUBUSBrr",
- "(V?)PSUBUSWrr",
- "(V?)PSUBWrr",
"(V?)PUNPCKHBWrr",
"(V?)PUNPCKHDQrr",
"(V?)PUNPCKHQDQrr",
@@ -604,6 +560,12 @@ def SBWriteResGroup21 : SchedWriteRes<[S
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr",
+ "MMX_PADD(B|D|W)irr",
+ "MMX_P(ADD|SUB)(U?)S(B|W)irr",
+ "MMX_PAVG(B|W)irr",
+ "MMX_PCMP(EQ|GT)(B|D|W)irr",
+ "MMX_P(MAX|MIN)(SW|UB)irr",
+ "MMX_PSUB(B|D|Q|W)irr",
"PUSHFS64",
"(V?)CVTDQ2PS(Y?)rr")>;
@@ -954,44 +916,11 @@ def SBWriteResGroup59 : SchedWriteRes<[S
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm",
- "(V?)PABSBrm",
- "(V?)PABSDrm",
- "(V?)PABSWrm",
- "(V?)PACKSSDWrm",
+def: InstRW<[SBWriteResGroup59], (instregex "(V?)PACKSSDWrm",
"(V?)PACKSSWBrm",
"(V?)PACKUSDWrm",
"(V?)PACKUSWBrm",
- "(V?)PADDBrm",
- "(V?)PADDDrm",
- "(V?)PADDQrm",
- "(V?)PADDSBrm",
- "(V?)PADDSWrm",
- "(V?)PADDUSBrm",
- "(V?)PADDUSWrm",
- "(V?)PADDWrm",
"(V?)PALIGNRrmi",
- "(V?)PAVGBrm",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "(V?)PMAXSBrm",
- "(V?)PMAXSDrm",
- "(V?)PMAXSWrm",
- "(V?)PMAXUBrm",
- "(V?)PMAXUDrm",
- "(V?)PMAXUWrm",
- "(V?)PMINSBrm",
- "(V?)PMINSDrm",
- "(V?)PMINSWrm",
- "(V?)PMINUBrm",
- "(V?)PMINUDrm",
- "(V?)PMINUWrm",
"(V?)PMOVSXBDrm",
"(V?)PMOVSXBQrm",
"(V?)PMOVSXBWrm",
@@ -1007,17 +936,6 @@ def: InstRW<[SBWriteResGroup59], (instre
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
"(V?)PSHUFLWmi",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
- "(V?)PSUBBrm",
- "(V?)PSUBDrm",
- "(V?)PSUBQrm",
- "(V?)PSUBSBrm",
- "(V?)PSUBSWrm",
- "(V?)PSUBUSBrm",
- "(V?)PSUBUSWrm",
- "(V?)PSUBWrm",
"(V?)PUNPCKHBWrm",
"(V?)PUNPCKHDQrm",
"(V?)PUNPCKHQDQrm",
@@ -1027,6 +945,18 @@ def: InstRW<[SBWriteResGroup59], (instre
"(V?)PUNPCKLQDQrm",
"(V?)PUNPCKLWDrm")>;
+def SBWriteResGroup59a : SchedWriteRes<[SBPort23,SBPort1]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm",
+ "MMX_P(ADD|SUB)(U?)S(B|W)irm",
+ "MMX_PAVG(B|W)irm",
+ "MMX_PCMP(EQ|GT)(B|D|W)irm",
+ "MMX_P(MAX|MIN)(SW|UB)irm",
+ "MMX_PSUB(B|D|Q|W)irm")>;
+
def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> {
let Latency = 7;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Thu May 3 06:27:10 2018
@@ -195,7 +195,8 @@ def : WriteRes<WriteVecLoad, [S
def : WriteRes<WriteVecStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteVecMove, [SKLPort015]>;
-defm : SKLWriteResPair<WriteVecALU, [SKLPort15], 1>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
@@ -373,7 +374,8 @@ def : WriteRes<WriteNop, []>;
defm : SKLWriteResPair<WriteFHAdd, [SKLPort5,SKLPort01], 6, [2,1], 3, 6>;
defm : SKLWriteResPair<WriteFHAddY, [SKLPort5,SKLPort01], 6, [2,1], 3, 7>;
-defm : SKLWriteResPair<WritePHAdd, [SKLPort15], 1>;
+defm : SKLWriteResPair<WritePHAdd, [SKLPort5,SKLPort015], 3, [2,1], 3, 6>;
+defm : SKLWriteResPair<WritePHAddY, [SKLPort5,SKLPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -429,38 +431,7 @@ def SKLWriteResGroup5 : SchedWriteRes<[S
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PABSB(Y?)rr",
- "(V?)PABSD(Y?)rr",
- "(V?)PABSW(Y?)rr",
- "(V?)PADDSB(Y?)rr",
- "(V?)PADDSW(Y?)rr",
- "(V?)PADDUSB(Y?)rr",
- "(V?)PADDUSW(Y?)rr",
- "(V?)PAVGB(Y?)rr",
- "(V?)PAVGW(Y?)rr",
- "(V?)PCMPEQB(Y?)rr",
- "(V?)PCMPEQD(Y?)rr",
- "(V?)PCMPEQQ(Y?)rr",
- "(V?)PCMPEQW(Y?)rr",
- "(V?)PCMPGTB(Y?)rr",
- "(V?)PCMPGTD(Y?)rr",
- "(V?)PCMPGTW(Y?)rr",
- "(V?)PMAXSB(Y?)rr",
- "(V?)PMAXSD(Y?)rr",
- "(V?)PMAXSW(Y?)rr",
- "(V?)PMAXUB(Y?)rr",
- "(V?)PMAXUD(Y?)rr",
- "(V?)PMAXUW(Y?)rr",
- "(V?)PMINSB(Y?)rr",
- "(V?)PMINSD(Y?)rr",
- "(V?)PMINSW(Y?)rr",
- "(V?)PMINUB(Y?)rr",
- "(V?)PMINUD(Y?)rr",
- "(V?)PMINUW(Y?)rr",
- "(V?)PSIGNB(Y?)rr",
- "(V?)PSIGND(Y?)rr",
- "(V?)PSIGNW(Y?)rr",
- "(V?)PSLLD(Y?)ri",
+def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PSLLD(Y?)ri",
"(V?)PSLLQ(Y?)ri",
"VPSLLVD(Y?)rr",
"VPSLLVQ(Y?)rr",
@@ -472,11 +443,7 @@ def: InstRW<[SKLWriteResGroup5], (instre
"(V?)PSRLQ(Y?)ri",
"VPSRLVD(Y?)rr",
"VPSRLVQ(Y?)rr",
- "(V?)PSRLW(Y?)ri",
- "(V?)PSUBSB(Y?)rr",
- "(V?)PSUBSW(Y?)rr",
- "(V?)PSUBUSB(Y?)rr",
- "(V?)PSUBUSW(Y?)rr")>;
+ "(V?)PSRLW(Y?)ri")>;
def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> {
let Latency = 1;
@@ -828,16 +795,6 @@ def SKLWriteResGroup37 : SchedWriteRes<[
}
def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-def SKLWriteResGroup38 : SchedWriteRes<[SKLPort5,SKLPort015]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SKLWriteResGroup38], (instregex "(V?)PHADDD(Y?)rr",
- "(V?)PHADDW(Y?)rr",
- "(V?)PHSUBD(Y?)rr",
- "(V?)PHSUBW(Y?)rr")>;
-
def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -1304,38 +1261,7 @@ def SKLWriteResGroup90 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PABSBrm",
- "(V?)PABSDrm",
- "(V?)PABSWrm",
- "(V?)PADDSBrm",
- "(V?)PADDSWrm",
- "(V?)PADDUSBrm",
- "(V?)PADDUSWrm",
- "(V?)PAVGBrm",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "(V?)PMAXSBrm",
- "(V?)PMAXSDrm",
- "(V?)PMAXSWrm",
- "(V?)PMAXUBrm",
- "(V?)PMAXUDrm",
- "(V?)PMAXUWrm",
- "(V?)PMINSBrm",
- "(V?)PMINSDrm",
- "(V?)PMINSWrm",
- "(V?)PMINUBrm",
- "(V?)PMINUDrm",
- "(V?)PMINUWrm",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
- "(V?)PSLLDrm",
+def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PSLLDrm",
"(V?)PSLLQrm",
"VPSLLVDrm",
"VPSLLVQrm",
@@ -1347,11 +1273,7 @@ def: InstRW<[SKLWriteResGroup90], (instr
"(V?)PSRLQrm",
"(V?)PSRLVDrm",
"VPSRLVQrm",
- "(V?)PSRLWrm",
- "(V?)PSUBSBrm",
- "(V?)PSUBSWrm",
- "(V?)PSUBUSBrm",
- "(V?)PSUBUSWrm")>;
+ "(V?)PSRLWrm")>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 7;
@@ -1514,38 +1436,7 @@ def SKLWriteResGroup109 : SchedWriteRes<
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup109], (instregex "VPABSBYrm",
- "VPABSDYrm",
- "VPABSWYrm",
- "VPADDSBYrm",
- "VPADDSWYrm",
- "VPADDUSBYrm",
- "VPADDUSWYrm",
- "VPAVGBYrm",
- "VPAVGWYrm",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSDYrm",
- "VPMAXSWYrm",
- "VPMAXUBYrm",
- "VPMAXUDYrm",
- "VPMAXUWYrm",
- "VPMINSBYrm",
- "VPMINSDYrm",
- "VPMINSWYrm",
- "VPMINUBYrm",
- "VPMINUDYrm",
- "VPMINUWYrm",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
- "VPSLLDYrm",
+def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLDYrm",
"VPSLLQYrm",
"VPSLLVDYrm",
"VPSLLVQYrm",
@@ -1557,11 +1448,7 @@ def: InstRW<[SKLWriteResGroup109], (inst
"VPSRLQYrm",
"VPSRLVDYrm",
"VPSRLVQYrm",
- "VPSRLWYrm",
- "VPSUBSBYrm",
- "VPSUBSWYrm",
- "VPSUBUSBYrm",
- "VPSUBUSWYrm")>;
+ "VPSRLWYrm")>;
def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 8;
@@ -1725,16 +1612,6 @@ def SKLWriteResGroup128 : SchedWriteRes<
def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKLWriteResGroup129 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKLWriteResGroup129], (instregex "(V?)PHADDDrm",
- "(V?)PHADDWrm",
- "(V?)PHSUBDrm",
- "(V?)PHSUBWrm")>;
-
def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1807,16 +1684,6 @@ def SKLWriteResGroup140 : SchedWriteRes<
def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWYrm",
"VPHSUBSWYrm")>;
-def SKLWriteResGroup141 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDDYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBWYrm")>;
-
def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Thu May 3 06:27:10 2018
@@ -195,7 +195,8 @@ def : WriteRes<WriteVecLoad, [S
def : WriteRes<WriteVecStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteVecMove, [SKXPort015]>;
-defm : SKXWriteResPair<WriteVecALU, [SKXPort15], 1>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
@@ -373,7 +374,8 @@ def : WriteRes<WriteNop, []>;
defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
-defm : SKXWriteResPair<WritePHAdd, [SKXPort15], 1>;
+defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
+defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -477,122 +479,7 @@ def SKXWriteResGroup5 : SchedWriteRes<[S
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBYrr",
- "VPABSBZ128rr",
- "VPABSBZ256rr",
- "VPABSBZrr",
- "(V?)PABSBrr",
- "VPABSDYrr",
- "VPABSDZ128rr",
- "VPABSDZ256rr",
- "VPABSDZrr",
- "(V?)PABSDrr",
- "VPABSQZ128rr",
- "VPABSQZ256rr",
- "VPABSQZrr",
- "VPABSWYrr",
- "VPABSWZ128rr",
- "VPABSWZ256rr",
- "VPABSWZrr",
- "(V?)PABSWrr",
- "VPADDSBYrr",
- "VPADDSBZ128rr",
- "VPADDSBZ256rr",
- "VPADDSBZrr",
- "(V?)PADDSBrr",
- "VPADDSWYrr",
- "VPADDSWZ128rr",
- "VPADDSWZ256rr",
- "VPADDSWZrr",
- "(V?)PADDSWrr",
- "VPADDUSBYrr",
- "VPADDUSBZ128rr",
- "VPADDUSBZ256rr",
- "VPADDUSBZrr",
- "(V?)PADDUSBrr",
- "VPADDUSWYrr",
- "VPADDUSWZ128rr",
- "VPADDUSWZ256rr",
- "VPADDUSWZrr",
- "(V?)PADDUSWrr",
- "VPAVGBYrr",
- "VPAVGBZ128rr",
- "VPAVGBZ256rr",
- "VPAVGBZrr",
- "(V?)PAVGBrr",
- "VPAVGWYrr",
- "VPAVGWZ128rr",
- "VPAVGWZ256rr",
- "VPAVGWZrr",
- "(V?)PAVGWrr",
- "(V?)PCMPEQB(Y?)rr",
- "(V?)PCMPEQD(Y?)rr",
- "(V?)PCMPEQQ(Y?)rr",
- "(V?)PCMPEQW(Y?)rr",
- "(V?)PCMPGTB(Y?)rr",
- "(V?)PCMPGTD(Y?)rr",
- "(V?)PCMPGTW(Y?)rr",
- "VPMAXSBYrr",
- "VPMAXSBZ128rr",
- "VPMAXSBZ256rr",
- "VPMAXSBZrr",
- "(V?)PMAXSBrr",
- "VPMAXSDYrr",
- "VPMAXSDZ128rr",
- "VPMAXSDZ256rr",
- "VPMAXSDZrr",
- "(V?)PMAXSDrr",
- "VPMAXSWYrr",
- "VPMAXSWZ128rr",
- "VPMAXSWZ256rr",
- "VPMAXSWZrr",
- "(V?)PMAXSWrr",
- "VPMAXUBYrr",
- "VPMAXUBZ128rr",
- "VPMAXUBZ256rr",
- "VPMAXUBZrr",
- "(V?)PMAXUBrr",
- "VPMAXUDYrr",
- "VPMAXUDZ128rr",
- "VPMAXUDZ256rr",
- "VPMAXUDZrr",
- "(V?)PMAXUDrr",
- "VPMAXUWYrr",
- "VPMAXUWZ128rr",
- "VPMAXUWZ256rr",
- "VPMAXUWZrr",
- "(V?)PMAXUWrr",
- "VPMINSBYrr",
- "VPMINSBZ128rr",
- "VPMINSBZ256rr",
- "VPMINSBZrr",
- "(V?)PMINSBrr",
- "VPMINSDYrr",
- "VPMINSDZ128rr",
- "VPMINSDZ256rr",
- "VPMINSDZrr",
- "(V?)PMINSDrr",
- "VPMINSWYrr",
- "VPMINSWZ128rr",
- "VPMINSWZ256rr",
- "VPMINSWZrr",
- "(V?)PMINSWrr",
- "VPMINUBYrr",
- "VPMINUBZ128rr",
- "VPMINUBZ256rr",
- "VPMINUBZrr",
- "(V?)PMINUBrr",
- "VPMINUDYrr",
- "VPMINUDZ128rr",
- "VPMINUDZ256rr",
- "VPMINUDZrr",
- "(V?)PMINUDrr",
- "VPMINUWYrr",
- "VPMINUWZ128rr",
- "VPMINUWZ256rr",
- "VPMINUWZrr",
- "(V?)PMINUWrr",
- "VPROLDZ128ri",
+def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ128ri",
"VPROLDZ256ri",
"VPROLDZri",
"VPROLQZ128ri",
@@ -616,9 +503,6 @@ def: InstRW<[SKXWriteResGroup5], (instre
"VPRORVQZ128rr",
"VPRORVQZ256rr",
"VPRORVQZrr",
- "(V?)PSIGNB(Y?)rr",
- "(V?)PSIGND(Y?)rr",
- "(V?)PSIGNW(Y?)rr",
"(V?)PSLLDYri",
"VPSLLDZ128ri",
"VPSLLDZ256ri",
@@ -698,27 +582,7 @@ def: InstRW<[SKXWriteResGroup5], (instre
"VPSRLWZ128ri",
"VPSRLWZ256ri",
"VPSRLWZri",
- "(V?)PSRLWri",
- "VPSUBSBYrr",
- "VPSUBSBZ128rr",
- "VPSUBSBZ256rr",
- "VPSUBSBZrr",
- "(V?)PSUBSBrr",
- "VPSUBSWYrr",
- "VPSUBSWZ128rr",
- "VPSUBSWZ256rr",
- "VPSUBSWZrr",
- "(V?)PSUBSWrr",
- "VPSUBUSBYrr",
- "VPSUBUSBZ128rr",
- "VPSUBUSBZ256rr",
- "VPSUBUSBZrr",
- "(V?)PSUBUSBrr",
- "VPSUBUSWYrr",
- "VPSUBUSWZ128rr",
- "VPSUBUSWZ256rr",
- "VPSUBUSWZrr",
- "(V?)PSUBUSWrr")>;
+ "(V?)PSRLWri")>;
def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> {
let Latency = 1;
@@ -1384,13 +1248,6 @@ def SKXWriteResGroup39 : SchedWriteRes<[
}
def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-def SKXWriteResGroup40 : SchedWriteRes<[SKXPort5,SKXPort015]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SKXWriteResGroup40], (instregex "(V?)PH(ADD|SUB)(D|W)(Y?)rr")>;
-
def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -2214,57 +2071,7 @@ def SKXWriteResGroup94 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBZ128rm(b?)",
- "(V?)PABSBrm",
- "VPABSDZ128rm(b?)",
- "(V?)PABSDrm",
- "VPABSQZ128rm(b?)",
- "VPABSWZ128rm(b?)",
- "(V?)PABSWrm",
- "VPADDSBZ128rm(b?)",
- "(V?)PADDSBrm",
- "VPADDSWZ128rm(b?)",
- "(V?)PADDSWrm",
- "VPADDUSBZ128rm(b?)",
- "(V?)PADDUSBrm",
- "VPADDUSWZ128rm(b?)",
- "(V?)PADDUSWrm",
- "VPAVGBZ128rm(b?)",
- "(V?)PAVGBrm",
- "VPAVGWZ128rm(b?)",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "VPMAXSBZ128rm(b?)",
- "(V?)PMAXSBrm",
- "VPMAXSDZ128rm(b?)",
- "(V?)PMAXSDrm",
- "VPMAXSWZ128rm(b?)",
- "(V?)PMAXSWrm",
- "VPMAXUBZ128rm(b?)",
- "(V?)PMAXUBrm",
- "VPMAXUDZ128rm(b?)",
- "(V?)PMAXUDrm",
- "VPMAXUWZ128rm(b?)",
- "(V?)PMAXUWrm",
- "VPMINSBZ128rm(b?)",
- "(V?)PMINSBrm",
- "VPMINSDZ128rm(b?)",
- "(V?)PMINSDrm",
- "VPMINSWZ128rm(b?)",
- "(V?)PMINSWrm",
- "VPMINUBZ128rm(b?)",
- "(V?)PMINUBrm",
- "VPMINUDZ128rm(b?)",
- "(V?)PMINUDrm",
- "VPMINUWZ128rm(b?)",
- "(V?)PMINUWrm",
- "VPROLDZ128m(b?)i",
+def: InstRW<[SKXWriteResGroup94], (instregex "VPROLDZ128m(b?)i",
"VPROLQZ128m(b?)i",
"VPROLVDZ128rm(b?)",
"VPROLVQZ128rm(b?)",
@@ -2272,9 +2079,6 @@ def: InstRW<[SKXWriteResGroup94], (instr
"VPRORQZ128m(b?)i",
"VPRORVDZ128rm(b?)",
"VPRORVQZ128rm(b?)",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
"VPSLLDZ128m(b?)i",
"VPSLLDZ128rm(b?)",
"(V?)PSLLDrm",
@@ -2314,15 +2118,7 @@ def: InstRW<[SKXWriteResGroup94], (instr
"VPSRLVWZ128rm(b?)",
"VPSRLWZ128mi(b?)",
"VPSRLWZ128rm(b?)",
- "(V?)PSRLWrm",
- "VPSUBSBZ128rm(b?)",
- "(V?)PSUBSBrm",
- "VPSUBSWZ128rm(b?)",
- "(V?)PSUBSWrm",
- "VPSUBUSBZ128rm(b?)",
- "(V?)PSUBUSBrm",
- "VPSUBUSWZ128rm(b?)",
- "(V?)PSUBUSWrm")>;
+ "(V?)PSRLWrm")>;
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
@@ -2615,79 +2411,7 @@ def SKXWriteResGroup120 : SchedWriteRes<
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBYrm",
- "VPABSBZ256rm(b?)",
- "VPABSBZrm(b?)",
- "VPABSDYrm",
- "VPABSDZ256rm(b?)",
- "VPABSDZrm(b?)",
- "VPABSQZ256rm(b?)",
- "VPABSQZrm(b?)",
- "VPABSWYrm",
- "VPABSWZ256rm(b?)",
- "VPABSWZrm(b?)",
- "VPADDSBYrm",
- "VPADDSBZ256rm(b?)",
- "VPADDSBZrm(b?)",
- "VPADDSWYrm",
- "VPADDSWZ256rm(b?)",
- "VPADDSWZrm(b?)",
- "VPADDUSBYrm",
- "VPADDUSBZ256rm(b?)",
- "VPADDUSBZrm(b?)",
- "VPADDUSWYrm",
- "VPADDUSWZ256rm(b?)",
- "VPADDUSWZrm(b?)",
- "VPAVGBYrm",
- "VPAVGBZ256rm(b?)",
- "VPAVGBZrm(b?)",
- "VPAVGWYrm",
- "VPAVGWZ256rm(b?)",
- "VPAVGWZrm(b?)",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSBZ256rm(b?)",
- "VPMAXSBZrm(b?)",
- "VPMAXSDYrm",
- "VPMAXSDZ256rm(b?)",
- "VPMAXSDZrm(b?)",
- "VPMAXSWYrm",
- "VPMAXSWZ256rm(b?)",
- "VPMAXSWZrm(b?)",
- "VPMAXUBYrm",
- "VPMAXUBZ256rm(b?)",
- "VPMAXUBZrm(b?)",
- "VPMAXUDYrm",
- "VPMAXUDZ256rm(b?)",
- "VPMAXUDZrm(b?)",
- "VPMAXUWYrm",
- "VPMAXUWZ256rm(b?)",
- "VPMAXUWZrm(b?)",
- "VPMINSBYrm",
- "VPMINSBZ256rm(b?)",
- "VPMINSBZrm(b?)",
- "VPMINSDYrm",
- "VPMINSDZ256rm(b?)",
- "VPMINSDZrm(b?)",
- "VPMINSWYrm",
- "VPMINSWZ256rm(b?)",
- "VPMINSWZrm(b?)",
- "VPMINUBYrm",
- "VPMINUBZ256rm(b?)",
- "VPMINUBZrm(b?)",
- "VPMINUDYrm",
- "VPMINUDZ256rm(b?)",
- "VPMINUDZrm(b?)",
- "VPMINUWYrm",
- "VPMINUWZ256rm(b?)",
- "VPMINUWZrm(b?)",
- "VPROLDZ256m(b?)i",
+def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZ256m(b?)i",
"VPROLDZm(b?)i",
"VPROLQZ256m(b?)i",
"VPROLQZm(b?)i",
@@ -2703,9 +2427,6 @@ def: InstRW<[SKXWriteResGroup120], (inst
"VPRORVDZrm(b?)",
"VPRORVQZ256rm(b?)",
"VPRORVQZrm(b?)",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
"VPSLLDYrm",
"VPSLLDZ256m(b?)i",
"VPSLLDZ256rm(b?)",
@@ -2772,19 +2493,7 @@ def: InstRW<[SKXWriteResGroup120], (inst
"VPSRLWZ256mi(b?)",
"VPSRLWZ256rm(b?)",
"VPSRLWZmi(b?)",
- "VPSRLWZrm(b?)",
- "VPSUBSBYrm",
- "VPSUBSBZ256rm(b?)",
- "VPSUBSBZrm(b?)",
- "VPSUBSWYrm",
- "VPSUBSWZ256rm(b?)",
- "VPSUBSWZrm(b?)",
- "VPSUBUSBYrm",
- "VPSUBUSBZ256rm(b?)",
- "VPSUBUSBZrm(b?)",
- "VPSUBUSWYrm",
- "VPSUBUSWZ256rm(b?)",
- "VPSUBUSWZrm(b?)")>;
+ "VPSRLWZrm(b?)")>;
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
@@ -3138,16 +2847,6 @@ def SKXWriteResGroup143 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKXWriteResGroup144 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKXWriteResGroup144], (instregex "(V?)PHADDDrm",
- "(V?)PHADDWrm",
- "(V?)PHSUBDrm",
- "(V?)PHSUBWrm")>;
-
def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -3315,16 +3014,6 @@ def SKXWriteResGroup154 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWYrm",
"VPHSUBSWYrm")>;
-def SKXWriteResGroup155 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDDYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBWYrm")>;
-
def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Thu May 3 06:27:10 2018
@@ -127,12 +127,14 @@ class FMASC { X86FoldableSchedWrite Sche
defm WriteFHAdd : X86SchedWritePair;
defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
defm WritePHAdd : X86SchedWritePair;
+defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
// Vector integer operations.
def WriteVecLoad : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecMove : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
@@ -249,9 +251,9 @@ def SchedWriteFVarBlend
WriteFVarBlendY, WriteFVarBlendY>;
def SchedWriteVecALU
- : X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALU, WriteVecALU>;
+ : X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALUY, WriteVecALUY>;
def SchedWritePHAdd
- : X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAdd, WritePHAdd>;
+ : X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAddY, WritePHAddY>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
WriteVecLogicY, WriteVecLogicY>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Thu May 3 06:27:10 2018
@@ -252,6 +252,7 @@ def : WriteRes<WriteVecStore, [AtomPort
def : WriteRes<WriteVecMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
@@ -316,6 +317,7 @@ defm : AtomWriteResPair<WriteAESDecEnc,
defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Thu May 3 06:27:10 2018
@@ -404,6 +404,7 @@ def : WriteRes<WriteVecStore, [JS
def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
@@ -468,6 +469,7 @@ defm : JWriteResFpuPair<WriteAESDecEnc,
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddY, [JFPU01, JVALU], 1>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Thu May 3 06:27:10 2018
@@ -166,6 +166,7 @@ defm : SLMWriteResPair<WriteVecShift, [S
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
// FIXME: The below is closer to correct, but caused some perf regressions.
@@ -201,6 +202,7 @@ def : WriteRes<WriteVecExtractSt, [SLM_
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Thu May 3 06:27:10 2018
@@ -238,7 +238,9 @@ defm : ZnWriteResFpuPair<WriteVecShift,
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WritePHAddY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Thu May 3 06:27:10 2018
@@ -10,7 +10,7 @@ define <8 x i32> @test_broadcasti128(<8
; GENERIC-LABEL: test_broadcasti128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00]
-; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_broadcasti128:
@@ -174,8 +174,8 @@ define <8 x float> @test_broadcastss_ymm
define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_extracti128:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
@@ -524,7 +524,7 @@ define <8 x i32> @test_inserti128(<8 x i
; GENERIC: # %bb.0:
; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_inserti128:
@@ -652,8 +652,8 @@ declare <16 x i16> @llvm.x86.avx2.mpsadb
define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
; GENERIC-LABEL: test_pabsb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:1.00]
+; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -702,8 +702,8 @@ declare <32 x i8> @llvm.x86.avx2.pabs.b(
define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
; GENERIC-LABEL: test_pabsd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:1.00]
+; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -752,8 +752,8 @@ declare <8 x i32> @llvm.x86.avx2.pabs.d(
define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
; GENERIC-LABEL: test_pabsw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:1.00]
+; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -978,8 +978,8 @@ declare <32 x i8> @llvm.x86.avx2.packusw
define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_paddb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddb:
@@ -1020,8 +1020,8 @@ define <32 x i8> @test_paddb(<32 x i8> %
define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_paddd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddd:
@@ -1062,8 +1062,8 @@ define <8 x i32> @test_paddd(<8 x i32> %
define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_paddq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddq:
@@ -1104,8 +1104,8 @@ define <4 x i64> @test_paddq(<4 x i64> %
define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_paddsb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddsb:
@@ -1147,8 +1147,8 @@ declare <32 x i8> @llvm.x86.avx2.padds.b
define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_paddsw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddsw:
@@ -1190,8 +1190,8 @@ declare <16 x i16> @llvm.x86.avx2.padds.
define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_paddusb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddusb:
@@ -1233,8 +1233,8 @@ declare <32 x i8> @llvm.x86.avx2.paddus.
define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_paddusw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddusw:
@@ -1276,8 +1276,8 @@ declare <16 x i16> @llvm.x86.avx2.paddus
define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_paddw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddw:
@@ -1320,7 +1320,7 @@ define <32 x i8> @test_palignr(<32 x i8>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
-; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_palignr:
@@ -1369,7 +1369,7 @@ define <4 x i64> @test_pand(<4 x i64> %a
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pand:
@@ -1418,7 +1418,7 @@ define <4 x i64> @test_pandn(<4 x i64> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pandn:
@@ -1467,8 +1467,8 @@ define <4 x i64> @test_pandn(<4 x i64> %
define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pavgb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pavgb:
@@ -1519,8 +1519,8 @@ define <32 x i8> @test_pavgb(<32 x i8> %
define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pavgw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pavgw:
@@ -1622,7 +1622,7 @@ define <8 x i32> @test_pblendd_ymm(<8 x
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pblendd_ymm:
@@ -1714,7 +1714,7 @@ define <16 x i16> @test_pblendw(<16 x i1
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:0.50]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pblendw:
@@ -1812,7 +1812,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastb_ymm:
@@ -1910,7 +1910,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastd_ymm:
@@ -2008,7 +2008,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastq_ymm:
@@ -2106,7 +2106,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastw_ymm:
@@ -2153,8 +2153,8 @@ define <16 x i16> @test_pbroadcastw_ymm(
define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pcmpeqb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqb:
@@ -2197,8 +2197,8 @@ define <32 x i8> @test_pcmpeqb(<32 x i8>
define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pcmpeqd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqd:
@@ -2241,8 +2241,8 @@ define <8 x i32> @test_pcmpeqd(<8 x i32>
define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_pcmpeqq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqq:
@@ -2285,8 +2285,8 @@ define <4 x i64> @test_pcmpeqq(<4 x i64>
define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pcmpeqw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqw:
@@ -2329,8 +2329,8 @@ define <16 x i16> @test_pcmpeqw(<16 x i1
define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pcmpgtb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtb:
@@ -2373,8 +2373,8 @@ define <32 x i8> @test_pcmpgtb(<32 x i8>
define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pcmpgtd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtd:
@@ -2417,8 +2417,8 @@ define <8 x i32> @test_pcmpgtd(<8 x i32>
define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_pcmpgtq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtq:
@@ -2461,8 +2461,8 @@ define <4 x i64> @test_pcmpgtq(<4 x i64>
define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pcmpgtw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtw:
@@ -2507,7 +2507,7 @@ define <4 x i64> @test_perm2i128(<4 x i6
; GENERIC: # %bb.0:
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_perm2i128:
@@ -2556,7 +2556,7 @@ define <8 x i32> @test_permd(<8 x i32> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_permd:
@@ -2705,7 +2705,7 @@ define <4 x i64> @test_permq(<4 x i64> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_permq:
@@ -3039,7 +3039,7 @@ define <8 x i32> @test_phaddd(<8 x i32>
; GENERIC-LABEL: test_phaddd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
+; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddd:
@@ -3082,7 +3082,7 @@ define <16 x i16> @test_phaddsw(<16 x i1
; GENERIC-LABEL: test_phaddsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
+; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddsw:
@@ -3125,7 +3125,7 @@ define <16 x i16> @test_phaddw(<16 x i16
; GENERIC-LABEL: test_phaddw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
+; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddw:
@@ -3168,7 +3168,7 @@ define <8 x i32> @test_phsubd(<8 x i32>
; GENERIC-LABEL: test_phsubd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
+; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubd:
@@ -3211,7 +3211,7 @@ define <16 x i16> @test_phsubsw(<16 x i1
; GENERIC-LABEL: test_phsubsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
+; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubsw:
@@ -3254,7 +3254,7 @@ define <16 x i16> @test_phsubw(<16 x i16
; GENERIC-LABEL: test_phsubw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
+; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubw:
@@ -3580,8 +3580,8 @@ declare void @llvm.x86.avx2.maskstore.q.
define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pmaxsb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxsb:
@@ -3623,8 +3623,8 @@ declare <32 x i8> @llvm.x86.avx2.pmaxs.b
define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmaxsd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxsd:
@@ -3666,8 +3666,8 @@ declare <8 x i32> @llvm.x86.avx2.pmaxs.d
define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pmaxsw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxsw:
@@ -3709,8 +3709,8 @@ declare <16 x i16> @llvm.x86.avx2.pmaxs.
define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pmaxub:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxub:
@@ -3752,8 +3752,8 @@ declare <32 x i8> @llvm.x86.avx2.pmaxu.b
define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmaxud:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxud:
@@ -3795,8 +3795,8 @@ declare <8 x i32> @llvm.x86.avx2.pmaxu.d
define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pmaxuw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxuw:
@@ -3838,8 +3838,8 @@ declare <16 x i16> @llvm.x86.avx2.pmaxu.
define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pminsb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminsb:
@@ -3881,8 +3881,8 @@ declare <32 x i8> @llvm.x86.avx2.pmins.b
define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pminsd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminsd:
@@ -3924,8 +3924,8 @@ declare <8 x i32> @llvm.x86.avx2.pmins.d
define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pminsw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminsw:
@@ -3967,8 +3967,8 @@ declare <16 x i16> @llvm.x86.avx2.pmins.
define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pminub:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminub:
@@ -4010,8 +4010,8 @@ declare <32 x i8> @llvm.x86.avx2.pminu.b
define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pminud:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminud:
@@ -4053,8 +4053,8 @@ declare <8 x i32> @llvm.x86.avx2.pminu.d
define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pminuw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminuw:
@@ -4139,7 +4139,7 @@ define <8 x i32> @test_pmovsxbd(<16 x i8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxbd:
@@ -4190,7 +4190,7 @@ define <4 x i64> @test_pmovsxbq(<16 x i8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxbq:
@@ -4241,7 +4241,7 @@ define <16 x i16> @test_pmovsxbw(<16 x i
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxbw:
@@ -4290,7 +4290,7 @@ define <4 x i64> @test_pmovsxdq(<4 x i32
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxdq:
@@ -4339,7 +4339,7 @@ define <8 x i32> @test_pmovsxwd(<8 x i16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxwd:
@@ -4388,7 +4388,7 @@ define <4 x i64> @test_pmovsxwq(<8 x i16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [6:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxwq:
@@ -4439,7 +4439,7 @@ define <8 x i32> @test_pmovzxbd(<16 x i8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxbd:
@@ -4490,7 +4490,7 @@ define <4 x i64> @test_pmovzxbq(<16 x i8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxbq:
@@ -4541,7 +4541,7 @@ define <16 x i16> @test_pmovzxbw(<16 x i
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxbw:
@@ -4590,7 +4590,7 @@ define <4 x i64> @test_pmovzxdq(<4 x i32
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxdq:
@@ -4639,7 +4639,7 @@ define <8 x i32> @test_pmovzxwd(<8 x i16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxwd:
@@ -4688,7 +4688,7 @@ define <4 x i64> @test_pmovzxwq(<8 x i16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxwq:
@@ -5040,7 +5040,7 @@ define <4 x i64> @test_por(<4 x i64> %a0
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_por:
@@ -5176,7 +5176,7 @@ define <8 x i32> @test_pshufd(<8 x i32>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pshufd:
@@ -5321,8 +5321,8 @@ define <16 x i16> @test_pshuflw(<16 x i1
define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psignb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignb:
@@ -5364,8 +5364,8 @@ declare <32 x i8> @llvm.x86.avx2.psign.b
define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_psignd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignd:
@@ -5407,8 +5407,8 @@ declare <8 x i32> @llvm.x86.avx2.psign.d
define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psignw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignw:
@@ -6348,8 +6348,8 @@ declare <16 x i16> @llvm.x86.avx2.psrl.w
define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psubb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubb:
@@ -6390,8 +6390,8 @@ define <32 x i8> @test_psubb(<32 x i8> %
define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_psubd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubd:
@@ -6432,8 +6432,8 @@ define <8 x i32> @test_psubd(<8 x i32> %
define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_psubq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubq:
@@ -6474,8 +6474,8 @@ define <4 x i64> @test_psubq(<4 x i64> %
define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psubsb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubsb:
@@ -6517,8 +6517,8 @@ declare <32 x i8> @llvm.x86.avx2.psubs.b
define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psubsw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubsw:
@@ -6560,8 +6560,8 @@ declare <16 x i16> @llvm.x86.avx2.psubs.
define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psubusb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubusb:
@@ -6603,8 +6603,8 @@ declare <32 x i8> @llvm.x86.avx2.psubus.
define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psubusw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubusw:
@@ -6646,8 +6646,8 @@ declare <16 x i16> @llvm.x86.avx2.psubus
define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psubw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubw:
@@ -6732,8 +6732,8 @@ define <8 x i32> @test_punpckhdq(<8 x i3
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpckhdq:
@@ -6787,7 +6787,7 @@ define <4 x i64> @test_punpckhqdq(<4 x i
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpckhqdq:
@@ -6920,8 +6920,8 @@ define <8 x i32> @test_punpckldq(<8 x i3
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
+; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpckldq:
@@ -6975,7 +6975,7 @@ define <4 x i64> @test_punpcklqdq(<4 x i
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpcklqdq:
@@ -7066,7 +7066,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pxor:
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Thu May 3 06:27:10 2018
@@ -291,7 +291,7 @@ entry:
define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; GENERIC-LABEL: vpaddq_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_test:
@@ -305,7 +305,7 @@ define <8 x i64> @vpaddq_test(<8 x i64>
define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
; GENERIC-LABEL: vpaddq_fold_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_fold_test:
@@ -320,7 +320,7 @@ define <8 x i64> @vpaddq_fold_test(<8 x
define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
; GENERIC-LABEL: vpaddq_broadcast_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_broadcast_test:
@@ -334,7 +334,7 @@ define <8 x i64> @vpaddq_broadcast_test(
define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
; GENERIC-LABEL: vpaddq_broadcast2_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_broadcast2_test:
@@ -357,7 +357,7 @@ define <8 x i64> @vpaddq_broadcast2_test
define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; GENERIC-LABEL: vpaddd_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_test:
@@ -371,7 +371,7 @@ define <16 x i32> @vpaddd_test(<16 x i32
define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
; GENERIC-LABEL: vpaddd_fold_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_fold_test:
@@ -386,7 +386,7 @@ define <16 x i32> @vpaddd_fold_test(<16
define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
; GENERIC-LABEL: vpaddd_broadcast_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_broadcast_test:
@@ -401,7 +401,7 @@ define <16 x i32> @vpaddd_mask_test(<16
; GENERIC-LABEL: vpaddd_mask_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_test:
@@ -419,7 +419,7 @@ define <16 x i32> @vpaddd_maskz_test(<16
; GENERIC-LABEL: vpaddd_maskz_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_test:
@@ -437,7 +437,7 @@ define <16 x i32> @vpaddd_mask_fold_test
; GENERIC-LABEL: vpaddd_mask_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_fold_test:
@@ -456,7 +456,7 @@ define <16 x i32> @vpaddd_mask_broadcast
; GENERIC-LABEL: vpaddd_mask_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_broadcast_test:
@@ -474,7 +474,7 @@ define <16 x i32> @vpaddd_maskz_fold_tes
; GENERIC-LABEL: vpaddd_maskz_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_fold_test:
@@ -493,7 +493,7 @@ define <16 x i32> @vpaddd_maskz_broadcas
; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_broadcast_test:
@@ -510,7 +510,7 @@ define <16 x i32> @vpaddd_maskz_broadcas
define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; GENERIC-LABEL: vpsubq_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpsubq_test:
@@ -524,7 +524,7 @@ define <8 x i64> @vpsubq_test(<8 x i64>
define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; GENERIC-LABEL: vpsubd_test:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpsubd_test:
@@ -643,7 +643,7 @@ define <16 x float> @fadd_broadcast(<16
define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
; GENERIC-LABEL: addq_broadcast:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: addq_broadcast:
@@ -2828,7 +2828,7 @@ define <8 x float> @ubto8f32(<8 x i32> %
; GENERIC-LABEL: ubto8f32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2847,7 +2847,7 @@ define <8 x double> @ubto8f64(<8 x i32>
; GENERIC-LABEL: ubto8f64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -4369,8 +4369,8 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_8i1_8i32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8i1_8i32:
@@ -4420,7 +4420,7 @@ define i16 @trunc_i32_to_i1(i32 %a) {
define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_8i1_8i16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -4439,7 +4439,7 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32
define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_16i1_16i32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4456,7 +4456,7 @@ define <16 x i32> @sext_16i1_16i32(<16 x
define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_8i1_8i64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4640,7 +4640,7 @@ define <4 x i64> @zext_8x32_to_4x64(<8 x
define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
; GENERIC-LABEL: zext_64xi1_to_64xi8:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50]
; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4657,7 +4657,7 @@ define <64 x i8> @zext_64xi1_to_64xi8(<6
define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
; GENERIC-LABEL: zext_32xi1_to_32xi16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -4676,7 +4676,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(
define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
; GENERIC-LABEL: zext_16xi1_to_16xi16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4694,7 +4694,7 @@ define <16 x i16> @zext_16xi1_to_16xi16(
define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
; GENERIC-LABEL: zext_32xi1_to_32xi8:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50]
; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5028,7 +5028,7 @@ define <16 x float> @test_x86_fmadd213_p
define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5048,7 +5048,7 @@ entry:
define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandnd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5070,7 +5070,7 @@ entry:
define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpord:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5090,7 +5090,7 @@ entry:
define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpxord:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5110,7 +5110,7 @@ entry:
define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandq:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5129,7 +5129,7 @@ entry:
define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandnq:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5149,7 +5149,7 @@ entry:
define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vporq:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5168,7 +5168,7 @@ entry:
define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpxorq:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6867,7 +6867,7 @@ define i8 @shuf_test1(i16 %v) nounwind {
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
@@ -6891,7 +6891,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test2:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
@@ -6917,7 +6917,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
@@ -6967,8 +6967,8 @@ entry:
define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
; GENERIC-LABEL: test4:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -6990,8 +6990,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4
define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
; GENERIC-LABEL: vcmp_test5:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7059,7 +7059,7 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-NEXT: .LBB386_1:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
Modified: llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-schedule.ll Thu May 3 06:27:10 2018
@@ -8,15 +8,15 @@ define void @test_vpopcntd(<16 x i32> %a
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
-; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00]
+; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50]
+; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
+; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -28,12 +28,12 @@ define void @test_vpopcntd(<16 x i32> %a
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [6:0.50]
+; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]
@@ -46,15 +46,15 @@ define void @test_vpopcntq(<8 x i64> %a0
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
-; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00]
+; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50]
+; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
+; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
+; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -66,12 +66,12 @@ define void @test_vpopcntq(<8 x i64> %a0
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [6:0.50]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [6:0.50]
+; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
+; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]
Modified: llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll Thu May 3 06:27:10 2018
@@ -6,7 +6,7 @@
define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) {
; GENERIC-LABEL: test_extrq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_extrq:
@@ -26,7 +26,7 @@ declare <2 x i64> @llvm.x86.sse4a.extrq(
define <2 x i64> @test_extrqi(<2 x i64> %a0) {
; GENERIC-LABEL: test_extrqi:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_extrqi:
@@ -46,7 +46,7 @@ declare <2 x i64> @llvm.x86.sse4a.extrqi
define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) {
; GENERIC-LABEL: test_insertq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_insertq:
@@ -66,7 +66,7 @@ declare <2 x i64> @llvm.x86.sse4a.insert
define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) {
; GENERIC-LABEL: test_insertqi:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_insertqi:
Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=331453&r1=331452&r2=331453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Thu May 3 06:27:10 2018
@@ -147,14 +147,14 @@ define void @test_vpcom(<2 x i64> %a0, <
; GENERIC-LABEL: test_vpcom:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -179,14 +179,14 @@ define void @test_vpcomu(<2 x i64> %a0,
; GENERIC-LABEL: test_vpcomu:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list