[llvm] r329806 - [X86] Add variable shuffle schedule classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 11 06:49:19 PDT 2018
Author: rksimon
Date: Wed Apr 11 06:49:19 2018
New Revision: 329806
URL: http://llvm.org/viewvc/llvm-project?rev=329806&view=rev
Log:
[X86] Add variable shuffle schedule classes
Split variable index shuffles from immediate index shuffles
WriteFVarShuffle - variable 'in-lane' shuffles (VPERMILPS/VPERMIL2PS etc.)
WriteVarShuffle - variable 'in-lane' shuffles (PSHUFB/VPPERM etc.)
WriteFVarShuffle256 - variable 'cross-lane' shuffles (VPERMPS etc.)
WriteVarShuffle256 - variable 'cross-lane' shuffles (VPERMD etc.)
Differential Revision: https://reviews.llvm.org/D45404
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrMMX.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86InstrXOP.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
llvm/trunk/test/CodeGen/X86/xop-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Apr 11 06:49:19 2018
@@ -1725,12 +1725,12 @@ defm VPBROADCASTMB2Q : avx512_mask_broad
//===----------------------------------------------------------------------===//
// -- VPERMI2 - 3 source operands form --
-let Sched = WriteFShuffle256 in
+let Sched = WriteFVarShuffle256 in
def AVX512_PERM2_F : OpndItins<
IIC_SSE_SHUFP, IIC_SSE_SHUFP
>;
-let Sched = WriteShuffle256 in
+let Sched = WriteVarShuffle256 in
def AVX512_PERM2_I : OpndItins<
IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
>;
@@ -8969,7 +8969,7 @@ let Predicates = [HasDQI, NoBWI] in {
//
// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
-let Sched = WriteShuffle256 in {
+let Sched = WriteVarShuffle256 in {
def AVX512_COMPRESS : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Wed Apr 11 06:49:19 2018
@@ -74,11 +74,12 @@ def MMX_UNPCK_L_ITINS : OpndItins<
def MMX_PCK_ITINS : OpndItins<
IIC_MMX_PCK_RR, IIC_MMX_PCK_RM
>;
+} // Sched
+let Sched = WriteVarShuffle in
def MMX_PSHUF_ITINS : OpndItins<
IIC_MMX_PSHUF, IIC_MMX_PSHUF
>;
-} // Sched
let Sched = WriteCvtF2I in {
def MMX_CVT_PD_ITINS : OpndItins<
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Apr 11 06:49:19 2018
@@ -5050,7 +5050,7 @@ def SSE_PHADDSUBW : OpndItins<
IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
>;
}
-let Sched = WriteShuffle in
+let Sched = WriteVarShuffle in
def SSE_PSHUFB : OpndItins<
IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
>;
@@ -7688,7 +7688,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0
// VPERMIL - Permute Single and Double Floating-Point Values
//
-let Sched = WriteFShuffle in
+let Sched = WriteFVarShuffle in
def AVX_VPERMILV : OpndItins<
IIC_SSE_SHUFP, IIC_SSE_SHUFP
>;
@@ -7707,13 +7707,13 @@ multiclass avx_permil<bits<8> opc_rm, bi
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
- Sched<[WriteFShuffle]>;
+ Sched<[WriteFVarShuffle]>;
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
(i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
- Sched<[WriteFShuffleLd, ReadAfterLd]>;
+ Sched<[WriteFVarShuffleLd, ReadAfterLd]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
@@ -8181,10 +8181,10 @@ multiclass avx2_perm<bits<8> opc, string
}
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256,
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteVarShuffle256,
i256mem>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256,
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256,
f256mem>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Wed Apr 11 06:49:19 2018
@@ -279,7 +279,7 @@ multiclass xop4op<bits<8> opc, string Op
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[WriteShuffle]>;
+ XOP_4V, Sched<[WriteVarShuffle]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
@@ -287,7 +287,7 @@ multiclass xop4op<bits<8> opc, string Op
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
- XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd, ReadAfterLd]>;
+ XOP_4V, VEX_W, Sched<[WriteVarShuffleLd, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -295,7 +295,7 @@ multiclass xop4op<bits<8> opc, string Op
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd,
+ XOP_4V, Sched<[WriteVarShuffleLd, ReadAfterLd,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
@@ -307,7 +307,7 @@ multiclass xop4op<bits<8> opc, string Op
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W, Sched<[WriteShuffle]>, FoldGenData<NAME#rrr>;
+ []>, XOP_4V, VEX_W, Sched<[WriteVarShuffle]>, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
@@ -367,7 +367,7 @@ multiclass xop_vpermil2<bits<8> Opc, str
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
- Sched<[WriteFShuffle]>;
+ Sched<[WriteFVarShuffle]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
@@ -376,7 +376,7 @@ multiclass xop_vpermil2<bits<8> Opc, str
(VT (X86vpermil2 RC:$src1, RC:$src2,
(bitconvert (IntLdFrag addr:$src3)),
(i8 imm:$src4))))]>, VEX_W,
- Sched<[WriteFShuffleLd, ReadAfterLd, ReadAfterLd]>;
+ Sched<[WriteFVarShuffleLd, ReadAfterLd, ReadAfterLd]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
@@ -384,7 +384,7 @@ multiclass xop_vpermil2<bits<8> Opc, str
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
- Sched<[WriteFShuffleLd, ReadAfterLd,
+ Sched<[WriteFVarShuffleLd, ReadAfterLd,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
@@ -395,7 +395,7 @@ multiclass xop_vpermil2<bits<8> Opc, str
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, Sched<[WriteFShuffle]>, FoldGenData<NAME#rr>;
+ []>, VEX_W, Sched<[WriteFVarShuffle]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedDouble in {
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Wed Apr 11 06:49:19 2018
@@ -162,6 +162,7 @@ defm : BWWriteResPair<WriteFRcp, [BWPo
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5>; // Floating point reciprocal square root estimate.
defm : BWWriteResPair<WriteFMA, [BWPort01], 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
+defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2]>; // Fp vector variable blends.
@@ -178,6 +179,7 @@ defm : BWWriteResPair<WriteVecShift, [BW
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles.
+defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles.
defm : BWWriteResPair<WriteBlend, [BWPort15], 1>; // Vector blends.
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2]>; // Vector variable blends.
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 6, [1, 2]>; // Vector MPSAD.
@@ -288,7 +290,9 @@ def : WriteRes<WriteSystem, [BWPort0
// AVX2.
defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3>; // Fp 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3>; // 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3>; // 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -366,7 +370,6 @@ def: InstRW<[BWWriteResGroup3], (instreg
"MMX_MOVD64to64rr",
"MMX_MOVQ2DQrr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -404,9 +407,7 @@ def: InstRW<[BWWriteResGroup3], (instreg
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -419,7 +420,6 @@ def: InstRW<[BWWriteResGroup3], (instreg
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
@@ -891,9 +891,7 @@ def: InstRW<[BWWriteResGroup28], (instre
"VPBROADCASTW(Y?)rr",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
"VPERMPDYri",
- "VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Wed Apr 11 06:49:19 2018
@@ -159,8 +159,10 @@ defm : HWWriteResPair<WriteCvtI2F, [HWPo
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMA, [HWPort01], 5>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1>;
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
+defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2]>;
// Vector integer operations.
@@ -174,8 +176,10 @@ defm : HWWriteResPair<WriteVecALU, [HW
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteBlend, [HWPort15], 1>;
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>;
+defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2]>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 6, [1, 2]>;
@@ -724,7 +728,6 @@ def: InstRW<[HWWriteResGroup4], (instreg
"MMX_MOVD64to64rr",
"MMX_MOVQ2DQrr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -762,9 +765,7 @@ def: InstRW<[HWWriteResGroup4], (instreg
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -777,7 +778,6 @@ def: InstRW<[HWWriteResGroup4], (instreg
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
@@ -1780,9 +1780,7 @@ def: InstRW<[HWWriteResGroup51], (instre
"VPBROADCASTWrr",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
"VPERMPDYri",
- "VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Wed Apr 11 06:49:19 2018
@@ -148,6 +148,7 @@ defm : SBWriteResPair<WriteCvtF2I, [SBPo
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
+defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort0, SBPort5], 2>;
@@ -162,6 +163,7 @@ defm : SBWriteResPair<WriteVecALU, [SB
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>;
+defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>;
defm : SBWriteResPair<WriteBlend, [SBPort15], 1>;
defm : SBWriteResPair<WriteVarBlend, [SBPort1, SBPort5], 2>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 5, [1,2], 3>;
@@ -275,7 +277,9 @@ def : WriteRes<WriteNop, []>;
// AVX2/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
defm : SBWriteResPair<WriteFShuffle256, [SBPort0], 1>;
+defm : SBWriteResPair<WriteFVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteShuffle256, [SBPort0], 1>;
+defm : SBWriteResPair<WriteVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>;
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
@@ -352,9 +356,7 @@ def: InstRW<[SBWriteResGroup2], (instreg
"(V?)ORPS(Y?)rr",
"VPERM2F128rr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)SHUFPD(Y?)rri",
"(V?)SHUFPS(Y?)rri",
"(V?)UNPCKHPD(Y?)rr",
@@ -408,7 +410,6 @@ def: InstRW<[SBWriteResGroup5], (instreg
"MMX_PABSWrr",
"MMX_PADDQirr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSIGNBrr",
"MMX_PSIGNDrr",
"MMX_PSIGNWrr",
@@ -462,7 +463,6 @@ def: InstRW<[SBWriteResGroup5], (instreg
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFBrr",
"(V?)PSHUFDri",
"(V?)PSHUFHWri",
"(V?)PSHUFLWri",
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Wed Apr 11 06:49:19 2018
@@ -159,6 +159,7 @@ defm : SKLWriteResPair<WriteFRcp, [SKL
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 5>; // Floating point reciprocal square root estimate.
defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4>; // Fused Multiply Add.
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort5], 2, [2]>; // Fp vector variable blends.
@@ -175,6 +176,7 @@ defm : SKLWriteResPair<WriteVecShift, [S
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply.
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>;
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1>; // Vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1>; // Vector shuffles.
defm : SKLWriteResPair<WriteBlend, [SKLPort15], 1>; // Vector blends.
defm : SKLWriteResPair<WriteVarBlend, [SKLPort5], 2, [2]>; // Vector variable blends.
defm : SKLWriteResPair<WriteMPSAD, [SKLPort0, SKLPort5], 6, [1, 2]>; // Vector MPSAD.
@@ -294,7 +296,9 @@ def : WriteRes<WriteSystem, [SKLPort
// AVX2.
defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3>; // 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3>; // 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -367,7 +371,6 @@ def: InstRW<[SKLWriteResGroup3], (instre
"MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -397,9 +400,7 @@ def: InstRW<[SKLWriteResGroup3], (instre
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPD(Y?)ri",
- "VPERMILPD(Y?)rr",
"VPERMILPS(Y?)ri",
- "VPERMILPS(Y?)rr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -412,7 +413,6 @@ def: InstRW<[SKLWriteResGroup3], (instre
"(V?)PMOVZXDQrr",
"(V?)PMOVZXWDrr",
"(V?)PMOVZXWQrr",
- "(V?)PSHUFB(Y?)rr",
"(V?)PSHUFD(Y?)ri",
"(V?)PSHUFHW(Y?)ri",
"(V?)PSHUFLW(Y?)ri",
@@ -884,9 +884,7 @@ def: InstRW<[SKLWriteResGroup30], (instr
"(V?)PCMPGTQ(Y?)rr",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
"VPERMPDYri",
- "VPERMPSYrr",
"VPERMQYri",
"VPMOVSXBDYrr",
"VPMOVSXBQYrr",
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Wed Apr 11 06:49:19 2018
@@ -159,6 +159,7 @@ defm : SKXWriteResPair<WriteFRcp, [SKX
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 5>; // Floating point reciprocal square root estimate.
defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4>; // Fused Multiply Add.
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
+defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort5], 2, [2]>; // Fp vector variable blends.
@@ -175,6 +176,7 @@ defm : SKXWriteResPair<WriteVecShift, [S
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply.
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1>; // Vector shuffles.
+defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteBlend, [SKXPort15], 1>; // Vector blends.
defm : SKXWriteResPair<WriteVarBlend, [SKXPort5], 2, [2]>; // Vector variable blends.
defm : SKXWriteResPair<WriteMPSAD, [SKXPort0, SKXPort5], 6, [1, 2]>; // Vector MPSAD.
@@ -294,7 +296,9 @@ def : WriteRes<WriteSystem, [SKXPort
// AVX2.
defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector shuffles.
+defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3>; // 256-bit width vector shuffles.
+defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3>; // 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -412,7 +416,6 @@ def: InstRW<[SKXWriteResGroup3], (instre
"MMX_MOVD64rr",
"MMX_MOVD64to64rr",
"MMX_PALIGNRrri",
- "MMX_PSHUFBrr",
"MMX_PSHUFWri",
"MMX_PUNPCKHBWirr",
"MMX_PUNPCKHDQirr",
@@ -447,7 +450,6 @@ def: InstRW<[SKXWriteResGroup3], (instre
"PMOVZXDQrr",
"PMOVZXWDrr",
"PMOVZXWQrr",
- "PSHUFBrr",
"PSHUFDri",
"PSHUFHWri",
"PSHUFLWri",
@@ -530,25 +532,15 @@ def: InstRW<[SKXWriteResGroup3], (instre
"VPBROADCASTDrr",
"VPBROADCASTQrr",
"VPERMILPDYri",
- "VPERMILPDYrr",
"VPERMILPDZ128ri",
- "VPERMILPDZ128rr",
"VPERMILPDZ256ri",
- "VPERMILPDZ256rr",
"VPERMILPDZri",
- "VPERMILPDZrr",
"VPERMILPDri",
- "VPERMILPDrr",
"VPERMILPSYri",
- "VPERMILPSYrr",
"VPERMILPSZ128ri",
- "VPERMILPSZ128rr",
"VPERMILPSZ256ri",
- "VPERMILPSZ256rr",
"VPERMILPSZri",
- "VPERMILPSZrr",
"VPERMILPSri",
- "VPERMILPSrr",
"VPMOVSXBDrr",
"VPMOVSXBQrr",
"VPMOVSXBWrr",
@@ -561,11 +553,6 @@ def: InstRW<[SKXWriteResGroup3], (instre
"VPMOVZXDQrr",
"VPMOVZXWDrr",
"VPMOVZXWQrr",
- "VPSHUFBYrr",
- "VPSHUFBZ128rr",
- "VPSHUFBZ256rr",
- "VPSHUFBZrr",
- "VPSHUFBrr",
"VPSHUFDYri",
"VPSHUFDZ128ri",
"VPSHUFDZ256ri",
@@ -1859,46 +1846,12 @@ def: InstRW<[SKXWriteResGroup32], (instr
"VPCMPWZrri",
"VPERM2F128rr",
"VPERM2I128rr",
- "VPERMDYrr",
- "VPERMDZ256rr",
- "VPERMDZrr",
- "VPERMI2D128rr",
- "VPERMI2D256rr",
- "VPERMI2Drr",
- "VPERMI2PD128rr",
- "VPERMI2PD256rr",
- "VPERMI2PDrr",
- "VPERMI2PS128rr",
- "VPERMI2PS256rr",
- "VPERMI2PSrr",
- "VPERMI2Q128rr",
- "VPERMI2Q256rr",
- "VPERMI2Qrr",
"VPERMPDYri",
"VPERMPDZ256ri",
- "VPERMPDZ256rr",
"VPERMPDZri",
- "VPERMPDZrr",
- "VPERMPSYrr",
- "VPERMPSZ256rr",
- "VPERMPSZrr",
"VPERMQYri",
"VPERMQZ256ri",
- "VPERMQZ256rr",
"VPERMQZri",
- "VPERMQZrr",
- "VPERMT2D128rr",
- "VPERMT2D256rr",
- "VPERMT2Drr",
- "VPERMT2PD128rr",
- "VPERMT2PD256rr",
- "VPERMT2PDrr",
- "VPERMT2PS128rr",
- "VPERMT2PS256rr",
- "VPERMT2PSrr",
- "VPERMT2Q128rr",
- "VPERMT2Q256rr",
- "VPERMT2Qrr",
"VPMAXSQZ128rr",
"VPMAXSQZ256rr",
"VPMAXSQZrr",
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Wed Apr 11 06:49:19 2018
@@ -87,6 +87,7 @@ defm WriteFRcp : X86SchedWritePair; //
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
+defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
@@ -106,6 +107,7 @@ defm WriteVecShift : X86SchedWritePair;
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
defm WritePMULLD : X86SchedWritePair; // PMULLD
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
+defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
@@ -150,7 +152,9 @@ def WriteSystem : SchedWrite;
// AVX2.
defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
// Old microcoded instructions that nobody use.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Wed Apr 11 06:49:19 2018
@@ -301,9 +301,11 @@ defm : JWriteResFpuPair<WriteFRsqrt,
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
+defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
+defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn't exist on Jaguar.
////////////////////////////////////////////////////////////////////////////////
// Conversions.
@@ -367,10 +369,12 @@ defm : JWriteResFpuPair<WriteVecIMul,
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
////////////////////////////////////////////////////////////////////////////////
@@ -750,34 +754,6 @@ def JWriteVCVTPDYLd: SchedWriteRes<[JLAG
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
-def JWritePSHUFB: SchedWriteRes<[JFPU01, JVALU]> {
- let Latency = 2;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWritePSHUFB], (instrs MMX_PSHUFBrr, PSHUFBrr, VPSHUFBrr)>;
-
-def JWritePSHUFBLd: SchedWriteRes<[JLAGU, JFPU01, JVALU]> {
- let Latency = 7;
- let ResourceCycles = [1, 1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWritePSHUFBLd, ReadAfterLd], (instrs MMX_PSHUFBrm, PSHUFBrm, VPSHUFBrm)>;
-
-def JWriteVPERM: SchedWriteRes<[JFPU01, JFPX]> {
- let Latency = 2;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteVPERM], (instrs VPERMILPDrr, VPERMILPSrr)>;
-
-def JWriteVPERMLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 7;
- let ResourceCycles = [1, 1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteVPERMLd, ReadAfterLd], (instrs VPERMILPDrm, VPERMILPSrm)>;
-
def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> {
let Latency = 3;
let ResourceCycles = [2, 6];
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Wed Apr 11 06:49:19 2018
@@ -134,6 +134,7 @@ defm : SLMWriteResPair<WriteCvtF2I, [SLM
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
// Vector integer operations.
@@ -149,6 +150,7 @@ defm : SLMWriteResPair<WriteVecIMul, [S
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
@@ -255,7 +257,9 @@ def : WriteRes<WriteIMulH, [SLM_FPC_RSV
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFShuffle256, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffle256, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarShuffle256, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMA, [SLM_FPC_RSV0], 1>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Wed Apr 11 06:49:19 2018
@@ -201,6 +201,7 @@ defm : ZnWriteResFpuPair<WriteCvtF2F,
defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>;
defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
@@ -219,8 +220,10 @@ defm : ZnWriteResFpuPair<WriteVecALU,
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME
defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
+defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
@@ -241,6 +244,7 @@ def : WriteRes<WriteNop, []>;
// Following instructions with latency=100 are microcoded.
// We set long latency so as to block the entire pipeline.
defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>;
+defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>;
//Microcoded Instructions
let Latency = 100 in {
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed Apr 11 06:49:19 2018
@@ -5131,8 +5131,8 @@ declare <4 x i64> @llvm.x86.avx2.psad.bw
define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pshufb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pshufb:
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Apr 11 06:49:19 2018
@@ -7589,7 +7589,7 @@ define <32 x i16> @test_build_vec_v32i1(
define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
; GENERIC-LABEL: test_build_vec_v64i1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_build_vec_v64i1:
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Wed Apr 11 06:49:19 2018
@@ -4535,7 +4535,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC-LABEL: test_masked_16xi8_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4555,7 +4555,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mask0:
@@ -4572,7 +4572,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC-LABEL: test_masked_16xi8_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4592,7 +4592,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mask1:
@@ -4609,7 +4609,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC-LABEL: test_masked_16xi8_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4629,7 +4629,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mask2:
@@ -4659,7 +4659,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC-LABEL: test_masked_16xi8_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4679,7 +4679,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mask3:
@@ -4713,7 +4713,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mem_mask0:
@@ -4734,7 +4734,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0:
@@ -4755,7 +4755,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mem_mask1:
@@ -4776,7 +4776,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1:
@@ -4797,7 +4797,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mem_mask2:
@@ -4818,7 +4818,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2:
@@ -4855,7 +4855,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mem_mask3:
@@ -4876,7 +4876,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3:
@@ -4895,7 +4895,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) {
; GENERIC-LABEL: test_32xi8_perm_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi8_perm_mask0:
@@ -4909,7 +4909,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC-LABEL: test_masked_32xi8_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4929,7 +4929,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mask0:
@@ -4946,7 +4946,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC-LABEL: test_masked_32xi8_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4966,7 +4966,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mask1:
@@ -4983,7 +4983,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC-LABEL: test_masked_32xi8_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5003,7 +5003,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mask2:
@@ -5019,7 +5019,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) {
; GENERIC-LABEL: test_32xi8_perm_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi8_perm_mask3:
@@ -5033,7 +5033,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC-LABEL: test_masked_32xi8_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5053,7 +5053,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mask3:
@@ -5070,7 +5070,7 @@ define <32 x i8> @test_32xi8_perm_mem_ma
; GENERIC-LABEL: test_32xi8_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi8_perm_mem_mask0:
@@ -5087,7 +5087,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi8_perm_mem_mask0:
@@ -5108,7 +5108,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0:
@@ -5129,7 +5129,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi8_perm_mem_mask1:
@@ -5150,7 +5150,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1:
@@ -5171,7 +5171,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi8_perm_mem_mask2:
@@ -5192,7 +5192,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2:
@@ -5212,7 +5212,7 @@ define <32 x i8> @test_32xi8_perm_mem_ma
; GENERIC-LABEL: test_32xi8_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi8_perm_mem_mask3:
@@ -5229,7 +5229,7 @@ define <32 x i8> @test_masked_32xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi8_perm_mem_mask3:
@@ -5250,7 +5250,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3:
@@ -5269,7 +5269,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) {
; GENERIC-LABEL: test_64xi8_perm_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_64xi8_perm_mask0:
@@ -5283,7 +5283,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC-LABEL: test_masked_64xi8_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5303,7 +5303,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mask0:
@@ -5320,7 +5320,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC-LABEL: test_masked_64xi8_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5340,7 +5340,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mask1:
@@ -5357,7 +5357,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC-LABEL: test_masked_64xi8_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5377,7 +5377,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mask2:
@@ -5393,7 +5393,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) {
; GENERIC-LABEL: test_64xi8_perm_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_64xi8_perm_mask3:
@@ -5407,7 +5407,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC-LABEL: test_masked_64xi8_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5427,7 +5427,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mask3:
@@ -5444,7 +5444,7 @@ define <64 x i8> @test_64xi8_perm_mem_ma
; GENERIC-LABEL: test_64xi8_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_64xi8_perm_mem_mask0:
@@ -5461,7 +5461,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_64xi8_perm_mem_mask0:
@@ -5482,7 +5482,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0:
@@ -5503,7 +5503,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_64xi8_perm_mem_mask1:
@@ -5524,7 +5524,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1:
@@ -5545,7 +5545,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_64xi8_perm_mem_mask2:
@@ -5566,7 +5566,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2:
@@ -5586,7 +5586,7 @@ define <64 x i8> @test_64xi8_perm_mem_ma
; GENERIC-LABEL: test_64xi8_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [6:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_64xi8_perm_mem_mask3:
@@ -5603,7 +5603,7 @@ define <64 x i8> @test_masked_64xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_64xi8_perm_mem_mask3:
@@ -5624,7 +5624,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:1.00]
+; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3:
Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=329806&r1=329805&r2=329806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Wed Apr 11 06:49:19 2018
@@ -843,9 +843,9 @@ define void @test_vpperm(<2 x i64> %a0,
; GENERIC-LABEL: test_vpperm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list