[llvm] r331380 - [X86] Cleanup WriteFShuffle/WriteFVarShuffle (+256 variants) scheduler classes with more common default values
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed May 2 10:58:50 PDT 2018
Author: rksimon
Date: Wed May 2 10:58:50 2018
New Revision: 331380
URL: http://llvm.org/viewvc/llvm-project?rev=331380&view=rev
Log:
[X86] Cleanup WriteFShuffle/WriteFVarShuffle (+256 variants) scheduler classes with more common default values
Modified:
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
llvm/trunk/test/CodeGen/X86/xop-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Wed May 2 10:58:50 2018
@@ -205,8 +205,8 @@ defm : BWWriteResPair<WriteVecLogicY,[BW
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
-defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles.
-defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles.
+defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : BWWriteResPair<WriteBlend, [BWPort5], 1>; // Vector blends.
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
@@ -334,11 +334,11 @@ defm : BWWriteResPair<WriteCLMul, [BWPo
def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
// AVX2.
-defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3>; // Fp 256-bit width vector shuffles.
-defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3>; // Fp 256-bit width vector variable shuffles.
-defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3>; // 256-bit width vector shuffles.
-defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3>; // 256-bit width vector variable shuffles.
-defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
+defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
+defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
+defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Wed May 2 10:58:50 2018
@@ -172,14 +172,14 @@ defm : HWWriteResPair<WriteFMAY, [HWPor
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
-defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>;
-defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
-defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
+defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
@@ -200,11 +200,11 @@ defm : HWWriteResPair<WriteVecLogicY,[HW
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
-defm : HWWriteResPair<WriteShuffle, [HWPort5], 1>;
-defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVarShuffle,[HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteBlend, [HWPort5], 1, [1], 1, 6>;
-defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>;
-defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>;
+defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3, [1], 1, 7>;
+defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
@@ -221,6 +221,7 @@ def : WriteRes<WriteVecInsertLd, [HWPort
let Latency = 6;
let NumMicroOps = 2;
}
+def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
def : WriteRes<WriteVecExtract, [HWPort0,HWPort5]> {
let Latency = 2;
@@ -874,14 +875,11 @@ def HWWriteResGroup13 : SchedWriteRes<[H
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup13], (instregex "(V?)INSERTPSrm",
- "(V?)PACKSSDWrm",
+def: InstRW<[HWWriteResGroup13], (instregex "(V?)PACKSSDWrm",
"(V?)PACKSSWBrm",
"(V?)PACKUSDWrm",
"(V?)PACKUSWBrm",
"(V?)PALIGNRrmi",
- "VPERMILPDmi",
- "VPERMILPSmi",
"(V?)PSHUFBrm",
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
@@ -893,13 +891,7 @@ def: InstRW<[HWWriteResGroup13], (instre
"(V?)PUNPCKLBWrm",
"(V?)PUNPCKLDQrm",
"(V?)PUNPCKLQDQrm",
- "(V?)PUNPCKLWDrm",
- "(V?)SHUFPDrmi",
- "(V?)SHUFPSrmi",
- "(V?)UNPCKHPDrm",
- "(V?)UNPCKHPSrm",
- "(V?)UNPCKLPDrm",
- "(V?)UNPCKLPSrm")>;
+ "(V?)PUNPCKLWDrm")>;
def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 8;
@@ -1415,13 +1407,7 @@ def HWWriteResGroup53 : SchedWriteRes<[H
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup53], (instregex "VPERM2F128rm",
- "VPERM2I128rm",
- "VPERMDYrm",
- "VPERMPDYmi",
- "VPERMPSYrm",
- "VPERMQYmi",
- "VPMOVZXBDYrm",
+def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",
"VPMOVZXDQYrm",
@@ -1798,8 +1784,8 @@ def HWWriteResGroup89 : SchedWriteRes<[H
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
- "MUL_FPrST0",
- "MUL_FST0r",
+ "MUL_FPrST0",
+ "MUL_FST0r",
"MUL_FrST0")>;
def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Wed May 2 10:58:50 2018
@@ -159,10 +159,10 @@ defm : SBWriteResPair<WriteCvtF2F, [SBPo
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
-defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1>;
+defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
@@ -180,8 +180,8 @@ defm : SBWriteResPair<WriteVecLogicY,[SB
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
-defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>;
-defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>;
+defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
@@ -322,10 +322,10 @@ def : WriteRes<WriteNop, []>;
// AVX2/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
-defm : SBWriteResPair<WriteFShuffle256, [SBPort0], 1>;
-defm : SBWriteResPair<WriteFVarShuffle256, [SBPort0], 1>;
-defm : SBWriteResPair<WriteShuffle256, [SBPort0], 1>;
-defm : SBWriteResPair<WriteVarShuffle256, [SBPort0], 1>;
+defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>;
+defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>;
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAS, [SBPort01], 5>;
@@ -372,9 +372,6 @@ def: InstRW<[SBWriteResGroup2], (instreg
"RETQ",
"ST_FPrr",
"ST_Frr",
- "VEXTRACTF128rr",
- "VINSERTF128rr",
- "VPERM2F128rr",
"(V?)MOV64toPQIrr",
"(V?)MOVDI2PDIrr")>;
@@ -936,28 +933,6 @@ def: InstRW<[SBWriteResGroup55], (instre
"VTESTPDrm",
"VTESTPSrm")>;
-def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128",
- "(V?)INSERTPSrm",
- "(V?)MOVHPDrm",
- "(V?)MOVHPSrm",
- "(V?)MOVLPDrm",
- "(V?)MOVLPSrm",
- "VPERMILPDmi",
- "VPERMILPDrm",
- "VPERMILPSmi",
- "VPERMILPSrm",
- "(V?)SHUFPDrmi",
- "(V?)SHUFPSrmi",
- "(V?)UNPCKHPDrm",
- "(V?)UNPCKHPSrm",
- "(V?)UNPCKLPDrm",
- "(V?)UNPCKLPSrm")>;
-
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 7;
let NumMicroOps = 2;
@@ -1135,15 +1110,6 @@ def SBWriteResGroup72 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>;
-def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm",
- "VPERMILPDYrm",
- "VPERMILPSYrm")>;
-
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Wed May 2 10:58:50 2018
@@ -172,10 +172,10 @@ defm : SKLWriteResPair<WriteFMAY, [SKL
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
-defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
-defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
@@ -201,8 +201,8 @@ defm : SKLWriteResPair<WriteVecLogicY,[S
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply.
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>;
-defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1>; // Vector shuffles.
-defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1>; // Vector shuffles.
+defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKLWriteResPair<WriteBlend, [SKLPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
@@ -219,6 +219,7 @@ def : WriteRes<WriteVecInsertLd, [SKLPor
let Latency = 6;
let NumMicroOps = 2;
}
+def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
def : WriteRes<WriteVecExtract, [SKLPort0,SKLPort5]> {
let Latency = 3;
@@ -339,10 +340,10 @@ def : WriteRes<WriteCLMulLd, [SKLPort5,
def : WriteRes<WriteSystem, [SKLPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
// AVX2.
-defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector shuffles.
-defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector variable shuffles.
-defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3>; // 256-bit width vector shuffles.
-defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3>; // 256-bit width vector variable shuffles.
+defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
+defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -1260,18 +1261,13 @@ def SKLWriteResGroup88 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup88], (instregex "(V?)INSERTPSrm",
- "(V?)PACKSSDWrm",
+def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PACKSSDWrm",
"(V?)PACKSSWBrm",
"(V?)PACKUSDWrm",
"(V?)PACKUSWBrm",
"(V?)PALIGNRrmi",
"VPBROADCASTBrm",
"VPBROADCASTWrm",
- "VPERMILPDmi",
- "VPERMILPDrm",
- "VPERMILPSmi",
- "VPERMILPSrm",
"(V?)PSHUFBrm",
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
@@ -1283,13 +1279,7 @@ def: InstRW<[SKLWriteResGroup88], (instr
"(V?)PUNPCKLBWrm",
"(V?)PUNPCKLDQrm",
"(V?)PUNPCKLQDQrm",
- "(V?)PUNPCKLWDrm",
- "(V?)SHUFPDrmi",
- "(V?)SHUFPSrmi",
- "(V?)UNPCKHPDrm",
- "(V?)UNPCKHPSrm",
- "(V?)UNPCKLPDrm",
- "(V?)UNPCKLPSrm")>;
+ "(V?)PUNPCKLWDrm")>;
def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 7;
@@ -1514,8 +1504,6 @@ def: InstRW<[SKLWriteResGroup108], (inst
"VPBLENDWYrmi",
"VPBROADCASTBYrm",
"VPBROADCASTWYrm",
- "VPERMILPDYrm",
- "VPERMILPSYrm",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
"VPMOVSXWQYrm",
@@ -1791,12 +1779,6 @@ def SKLWriteResGroup133 : SchedWriteRes<
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VPCMPGTQYrm",
- "VPERM2F128rm",
- "VPERM2I128rm",
- "VPERMDYrm",
- "VPERMPDYmi",
- "VPERMPSYrm",
- "VPERMQYmi",
"VPMOVZXBDYrm",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Wed May 2 10:58:50 2018
@@ -172,10 +172,10 @@ defm : SKXWriteResPair<WriteFMAY, [SKXPo
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
-defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
-defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1>; // Floating point vector variable shuffles.
+defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
+defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
@@ -201,8 +201,8 @@ defm : SKXWriteResPair<WriteVecLogicY,[S
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply.
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
-defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1>; // Vector shuffles.
-defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1>; // Vector variable shuffles.
+defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
@@ -219,6 +219,7 @@ def : WriteRes<WriteVecInsertLd, [SKXPor
let Latency = 6;
let NumMicroOps = 2;
}
+def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> {
let Latency = 3;
@@ -339,10 +340,10 @@ def : WriteRes<WriteCLMulLd, [SKXPort5,
def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
// AVX2.
-defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector shuffles.
-defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector variable shuffles.
-defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3>; // 256-bit width vector shuffles.
-defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3>; // 256-bit width vector variable shuffles.
+defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
+defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
+defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
+defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@@ -1284,57 +1285,18 @@ def: InstRW<[SKXWriteResGroup32], (instr
"VPMINUQZ128rr",
"VPMINUQZ256rr",
"VPMINUQZrr",
- "VPMOVQDZ128rr",
- "VPMOVQDZ256rr",
- "VPMOVQDZrr",
"VPMOVSXBDYrr",
- "VPMOVSXBDZ128rr",
- "VPMOVSXBDZ256rr",
- "VPMOVSXBDZrr",
"VPMOVSXBQYrr",
- "VPMOVSXBQZ128rr",
- "VPMOVSXBQZ256rr",
- "VPMOVSXBQZrr",
"VPMOVSXBWYrr",
- "VPMOVSXBWZ128rr",
- "VPMOVSXBWZ256rr",
- "VPMOVSXBWZrr",
"VPMOVSXDQYrr",
- "VPMOVSXDQZ128rr",
- "VPMOVSXDQZ256rr",
- "VPMOVSXDQZrr",
"VPMOVSXWDYrr",
- "VPMOVSXWDZ128rr",
- "VPMOVSXWDZ256rr",
- "VPMOVSXWDZrr",
"VPMOVSXWQYrr",
- "VPMOVSXWQZ128rr",
- "VPMOVSXWQZ256rr",
- "VPMOVSXWQZrr",
"VPMOVZXBDYrr",
- "VPMOVZXBDZ128rr",
- "VPMOVZXBDZ256rr",
- "VPMOVZXBDZrr",
"VPMOVZXBQYrr",
- "VPMOVZXBQZ128rr",
- "VPMOVZXBQZ256rr",
- "VPMOVZXBQZrr",
"VPMOVZXBWYrr",
- "VPMOVZXBWZ128rr",
- "VPMOVZXBWZ256rr",
- "VPMOVZXBWZrr",
"VPMOVZXDQYrr",
- "VPMOVZXDQZ128rr",
- "VPMOVZXDQZ256rr",
- "VPMOVZXDQZrr",
"VPMOVZXWDYrr",
- "VPMOVZXWDZ128rr",
- "VPMOVZXWDZ256rr",
- "VPMOVZXWDZrr",
"VPMOVZXWQYrr",
- "VPMOVZXWQZ128rr",
- "VPMOVZXWQZ256rr",
- "VPMOVZXWQZrr",
"VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
"VPTESTMBZ128rr",
"VPTESTMBZ256rr",
@@ -2189,9 +2151,7 @@ def SKXWriteResGroup92 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)",
- "(V?)INSERTPSrm",
- "VMOVSDZrm(b?)",
+def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
"VMOVSSZrm(b?)",
"VPACKSSDWZ128rm(b?)",
"(V?)PACKSSDWrm",
@@ -2207,14 +2167,6 @@ def: InstRW<[SKXWriteResGroup92], (instr
"VPBROADCASTBrm",
"VPBROADCASTWZ128m(b?)",
"VPBROADCASTWrm",
- "VPERMILPDZ128m(b?)i",
- "VPERMILPDZ128rm(b?)",
- "VPERMILPDmi",
- "VPERMILPDrm",
- "VPERMILPSZ128m(b?)i",
- "VPERMILPSZ128rm(b?)",
- "VPERMILPSmi",
- "VPERMILPSrm",
"VPSHUFBZ128rm(b?)",
"(V?)PSHUFBrm",
"VPSHUFDZ128m(b?)i",
@@ -2240,19 +2192,7 @@ def: InstRW<[SKXWriteResGroup92], (instr
"VPUNPCKLQDQZ128rm(b?)",
"(V?)PUNPCKLQDQrm",
"VPUNPCKLWDZ128rm(b?)",
- "(V?)PUNPCKLWDrm",
- "VSHUFPDZ128rm(b?)i",
- "(V?)SHUFPDrmi",
- "VSHUFPSZ128rm(b?)i",
- "(V?)SHUFPSrmi",
- "VUNPCKHPDZ128rm(b?)",
- "(V?)UNPCKHPDrm",
- "VUNPCKHPSZ128rm(b?)",
- "(V?)UNPCKHPSrm",
- "VUNPCKLPDZ128rm(b?)",
- "(V?)UNPCKLPDrm",
- "VUNPCKLPSZ128rm(b?)",
- "(V?)UNPCKLPSrm")>;
+ "(V?)PUNPCKLWDrm")>;
def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
@@ -2711,12 +2651,6 @@ def: InstRW<[SKXWriteResGroup119], (inst
"VPBROADCASTWYrm",
"VPBROADCASTWZ256m(b?)",
"VPBROADCASTWZm(b?)",
- "VPERMILPDYrm",
- "VPERMILPDZ256rm(b?)",
- "VPERMILPDZrm(b?)",
- "VPERMILPSYrm",
- "VPERMILPSZ256rm(b?)",
- "VPERMILPSZrm(b?)",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
"VPMOVSXWQYrm",
@@ -3367,40 +3301,6 @@ def: InstRW<[SKXWriteResGroup148], (inst
"VPCMPUWZrmi(b?)",
"VPCMPWZ256rmi(b?)",
"VPCMPWZrmi(b?)",
- "VPERM2F128rm",
- "VPERM2I128rm",
- "VPERMDYrm",
- "VPERMDZ256rm(b?)",
- "VPERMDZrm(b?)",
- "VPERMI2D256rm(b?)",
- "VPERMI2Drm(b?)",
- "VPERMI2PD256rm(b?)",
- "VPERMI2PDrm(b?)",
- "VPERMI2PS256rm(b?)",
- "VPERMI2PSrm(b?)",
- "VPERMI2Q256rm(b?)",
- "VPERMI2Qrm(b?)",
- "VPERMPDYmi",
- "VPERMPDZ256m(b?)i",
- "VPERMPDZ256rm(b?)",
- "VPERMPDZm(b?)i",
- "VPERMPDZrm(b?)",
- "VPERMPSYrm",
- "VPERMPSZ256rm(b?)",
- "VPERMPSZrm(b?)",
- "VPERMQYmi",
- "VPERMQZ256m(b?)i",
- "VPERMQZ256rm(b?)",
- "VPERMQZm(b?)i",
- "VPERMQZrm(b?)",
- "VPERMT2D256rm(b?)",
- "VPERMT2Drm(b?)",
- "VPERMT2PD256rm(b?)",
- "VPERMT2PDrm(b?)",
- "VPERMT2PS256rm(b?)",
- "VPERMT2PSrm(b?)",
- "VPERMT2Q256rm(b?)",
- "VPERMT2Qrm(b?)",
"VPMAXSQZ256rm(b?)",
"VPMAXSQZrm(b?)",
"VPMAXUQZ256rm(b?)",
@@ -3409,35 +3309,11 @@ def: InstRW<[SKXWriteResGroup148], (inst
"VPMINSQZrm(b?)",
"VPMINUQZ256rm(b?)",
"VPMINUQZrm(b?)",
- "VPMOVSXBDZ256rm(b?)",
- "VPMOVSXBDZrm(b?)",
- "VPMOVSXBQZ256rm(b?)",
- "VPMOVSXBQZrm(b?)",
- "VPMOVSXBWZ256rm(b?)",
- "VPMOVSXBWZrm(b?)",
- "VPMOVSXDQZ256rm(b?)",
- "VPMOVSXDQZrm(b?)",
- "VPMOVSXWDZ256rm(b?)",
- "VPMOVSXWDZrm(b?)",
- "VPMOVSXWQZ256rm(b?)",
- "VPMOVSXWQZrm(b?)",
"VPMOVZXBDYrm",
- "VPMOVZXBDZ256rm(b?)",
- "VPMOVZXBDZrm(b?)",
"VPMOVZXBQYrm",
- "VPMOVZXBQZ256rm(b?)",
- "VPMOVZXBQZrm(b?)",
"VPMOVZXBWYrm",
- "VPMOVZXBWZ256rm(b?)",
- "VPMOVZXBWZrm(b?)",
"VPMOVZXDQYrm",
- "VPMOVZXDQZ256rm(b?)",
- "VPMOVZXDQZrm(b?)",
- "VPMOVZXWDZ256rm(b?)",
- "VPMOVZXWDZrm(b?)",
"VPMOVZXWQYrm",
- "VPMOVZXWQZ256rm(b?)",
- "VPMOVZXWQZrm(b?)",
"VPSADBWYrm",
"VPSADBWZ256rm(b?)",
"VPSADBWZrm(b?)",
@@ -3456,15 +3332,7 @@ def: InstRW<[SKXWriteResGroup148], (inst
"VPTESTNMQZ256rm(b?)",
"VPTESTNMQZrm(b?)",
"VPTESTNMWZ256rm(b?)",
- "VPTESTNMWZrm(b?)",
- "VSHUFF32X4Z256rm(b?)i",
- "VSHUFF32X4Zrm(b?)i",
- "VSHUFF64X2Z256rm(b?)i",
- "VSHUFF64X2Zrm(b?)i",
- "VSHUFI32X4Z256rm(b?)i",
- "VSHUFI32X4Zrm(b?)i",
- "VSHUFI64X2Z256rm(b?)i",
- "VSHUFI64X2Zrm(b?)i")>;
+ "VPTESTNMWZrm(b?)")>;
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 10;
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed May 2 10:58:50 2018
@@ -523,7 +523,7 @@ define <8 x i32> @test_inserti128(<8 x i
; GENERIC-LABEL: test_inserti128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2506,7 +2506,7 @@ define <4 x i64> @test_perm2i128(<4 x i6
; GENERIC-LABEL: test_perm2i128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2555,7 +2555,7 @@ define <8 x i32> @test_permd(<8 x i32> %
; GENERIC-LABEL: test_permd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2605,7 +2605,7 @@ define <4 x double> @test_permpd(<4 x do
; GENERIC-LABEL: test_permpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2654,7 +2654,7 @@ define <8 x float> @test_permps(<8 x i32
; GENERIC-LABEL: test_permps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2704,7 +2704,7 @@ define <4 x i64> @test_permq(<4 x i64> %
; GENERIC-LABEL: test_permq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed May 2 10:58:50 2018
@@ -2957,7 +2957,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x8mem_to_8x16:
@@ -2977,7 +2977,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x16:
@@ -2998,7 +2998,7 @@ define <16 x i16> @zext_16x8mem_to_16x16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16x8mem_to_16x16:
@@ -3018,7 +3018,7 @@ define <16 x i16> @sext_16x8mem_to_16x16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x8mem_to_16x16:
@@ -3104,7 +3104,7 @@ define <32 x i16> @zext_32x8mem_to_32x16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_32x8mem_to_32x16:
@@ -3124,7 +3124,7 @@ define <32 x i16> @sext_32x8mem_to_32x16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_32x8mem_to_32x16:
@@ -3210,7 +3210,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x8mem_to_4x32:
@@ -3230,7 +3230,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x8mem_to_4x32:
@@ -3250,7 +3250,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x8mem_to_8x32:
@@ -3270,7 +3270,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x32:
@@ -3290,7 +3290,7 @@ define <16 x i32> @zext_16x8mem_to_16x32
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16x8mem_to_16x32:
@@ -3310,7 +3310,7 @@ define <16 x i32> @sext_16x8mem_to_16x32
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x8mem_to_16x32:
@@ -3396,7 +3396,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_2x8mem_to_2x64:
@@ -3415,7 +3415,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mas
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_2x8mem_to_2x64mask:
@@ -3449,7 +3449,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x8mem_to_4x64:
@@ -3469,7 +3469,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x8mem_to_4x64mask:
@@ -3504,7 +3504,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x8mem_to_8x64:
@@ -3524,7 +3524,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mas
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x64mask:
@@ -3542,7 +3542,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mas
define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
; GENERIC-LABEL: sext_8x8mem_to_8x64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x64:
@@ -3559,7 +3559,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x16mem_to_4x32:
@@ -3579,7 +3579,7 @@ define <4 x i32> @sext_4x16mem_to_4x32ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x16mem_to_4x32mask:
@@ -3615,7 +3615,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x16mem_to_8x32:
@@ -3635,7 +3635,7 @@ define <8 x i32> @sext_8x16mem_to_8x32ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x16mem_to_8x32mask:
@@ -3703,7 +3703,7 @@ define <16 x i32> @zext_16x16mem_to_16x3
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16x16mem_to_16x32:
@@ -3723,7 +3723,7 @@ define <16 x i32> @sext_16x16mem_to_16x3
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x16mem_to_16x32mask:
@@ -3741,7 +3741,7 @@ define <16 x i32> @sext_16x16mem_to_16x3
define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
; GENERIC-LABEL: sext_16x16mem_to_16x32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x16mem_to_16x32:
@@ -3790,7 +3790,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_2x16mem_to_2x64:
@@ -3810,7 +3810,7 @@ define <2 x i64> @sext_2x16mem_to_2x64ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_2x16mem_to_2x64mask:
@@ -3845,7 +3845,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x16mem_to_4x64:
@@ -3865,7 +3865,7 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x16mem_to_4x64mask:
@@ -3900,7 +3900,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x16mem_to_8x64:
@@ -3920,7 +3920,7 @@ define <8 x i64> @sext_8x16mem_to_8x64ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x16mem_to_8x64mask:
@@ -3938,7 +3938,7 @@ define <8 x i64> @sext_8x16mem_to_8x64ma
define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
; GENERIC-LABEL: sext_8x16mem_to_8x64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x16mem_to_8x64:
@@ -3988,7 +3988,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_2x32mem_to_2x64:
@@ -4008,7 +4008,7 @@ define <2 x i64> @sext_2x32mem_to_2x64ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_2x32mem_to_2x64mask:
@@ -4043,7 +4043,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x32mem_to_4x64:
@@ -4063,7 +4063,7 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x32mem_to_4x64mask:
@@ -4131,7 +4131,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x32mem_to_8x64:
@@ -4151,7 +4151,7 @@ define <8 x i64> @sext_8x32mem_to_8x64ma
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x32mem_to_8x64mask:
@@ -4169,7 +4169,7 @@ define <8 x i64> @sext_8x32mem_to_8x64ma
define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
; GENERIC-LABEL: sext_8x32mem_to_8x64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x32mem_to_8x64:
@@ -4473,7 +4473,7 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32
define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
; GENERIC-LABEL: extload_v8i64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -8258,7 +8258,7 @@ define <16 x float> @_ss16xfloat_maskz
define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
; GENERIC-LABEL: _ss16xfloat_load:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_load:
@@ -8275,7 +8275,7 @@ define <16 x float> @_ss16xfloat_mask_
; GENERIC-LABEL: _ss16xfloat_mask_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_mask_load:
@@ -8295,7 +8295,7 @@ define <16 x float> @_ss16xfloat_maskz
; GENERIC-LABEL: _ss16xfloat_maskz_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_maskz_load:
@@ -8369,7 +8369,7 @@ define <8 x double> @_sd8xdouble_maskz
define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
; GENERIC-LABEL: _sd8xdouble_load:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_load:
@@ -8386,7 +8386,7 @@ define <8 x double> @_sd8xdouble_mask_
; GENERIC-LABEL: _sd8xdouble_mask_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_mask_load:
@@ -8406,7 +8406,7 @@ define <8 x double> @_sd8xdouble_maskz
; GENERIC-LABEL: _sd8xdouble_maskz_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_maskz_load:
@@ -8700,7 +8700,7 @@ define <16 x float> @broadcast_ss_spill(
; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f32
-; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -8732,7 +8732,7 @@ define <8 x double> @broadcast_sd_spill(
; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f64
-; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
+; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: retq # sched: [1:1.00]
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Wed May 2 10:58:50 2018
@@ -202,7 +202,7 @@ define <16 x i16> @test_16xi16_perm_mem_
; GENERIC-LABEL: test_16xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi16_perm_mem_mask0:
@@ -219,7 +219,7 @@ define <16 x i16> @test_masked_16xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask0:
@@ -240,7 +240,7 @@ define <16 x i16> @test_masked_z_16xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0:
@@ -261,7 +261,7 @@ define <16 x i16> @test_masked_16xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask1:
@@ -282,7 +282,7 @@ define <16 x i16> @test_masked_z_16xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1:
@@ -303,7 +303,7 @@ define <16 x i16> @test_masked_16xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask2:
@@ -324,7 +324,7 @@ define <16 x i16> @test_masked_z_16xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2:
@@ -344,7 +344,7 @@ define <16 x i16> @test_16xi16_perm_mem_
; GENERIC-LABEL: test_16xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi16_perm_mem_mask3:
@@ -361,7 +361,7 @@ define <16 x i16> @test_masked_16xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi16_perm_mem_mask3:
@@ -382,7 +382,7 @@ define <16 x i16> @test_masked_z_16xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3:
@@ -596,7 +596,7 @@ define <32 x i16> @test_32xi16_perm_mem_
; GENERIC-LABEL: test_32xi16_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi16_perm_mem_mask0:
@@ -613,7 +613,7 @@ define <32 x i16> @test_masked_32xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask0:
@@ -634,7 +634,7 @@ define <32 x i16> @test_masked_z_32xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0:
@@ -655,7 +655,7 @@ define <32 x i16> @test_masked_32xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask1:
@@ -676,7 +676,7 @@ define <32 x i16> @test_masked_z_32xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1:
@@ -697,7 +697,7 @@ define <32 x i16> @test_masked_32xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask2:
@@ -718,7 +718,7 @@ define <32 x i16> @test_masked_z_32xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2:
@@ -738,7 +738,7 @@ define <32 x i16> @test_32xi16_perm_mem_
; GENERIC-LABEL: test_32xi16_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_32xi16_perm_mem_mask3:
@@ -755,7 +755,7 @@ define <32 x i16> @test_masked_32xi16_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_32xi16_perm_mem_mask3:
@@ -776,7 +776,7 @@ define <32 x i16> @test_masked_z_32xi16_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3:
@@ -990,7 +990,7 @@ define <8 x i32> @test_8xi32_perm_mem_ma
; GENERIC-LABEL: test_8xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_perm_mem_mask0:
@@ -1007,7 +1007,7 @@ define <8 x i32> @test_masked_8xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask0:
@@ -1028,7 +1028,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0:
@@ -1049,7 +1049,7 @@ define <8 x i32> @test_masked_8xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask1:
@@ -1070,7 +1070,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1:
@@ -1091,7 +1091,7 @@ define <8 x i32> @test_masked_8xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask2:
@@ -1112,7 +1112,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2:
@@ -1132,7 +1132,7 @@ define <8 x i32> @test_8xi32_perm_mem_ma
; GENERIC-LABEL: test_8xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_perm_mem_mask3:
@@ -1149,7 +1149,7 @@ define <8 x i32> @test_masked_8xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi32_perm_mem_mask3:
@@ -1170,7 +1170,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3:
@@ -1384,7 +1384,7 @@ define <16 x i32> @test_16xi32_perm_mem_
; GENERIC-LABEL: test_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_perm_mem_mask0:
@@ -1401,7 +1401,7 @@ define <16 x i32> @test_masked_16xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask0:
@@ -1422,7 +1422,7 @@ define <16 x i32> @test_masked_z_16xi32_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0:
@@ -1443,7 +1443,7 @@ define <16 x i32> @test_masked_16xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask1:
@@ -1464,7 +1464,7 @@ define <16 x i32> @test_masked_z_16xi32_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1:
@@ -1485,7 +1485,7 @@ define <16 x i32> @test_masked_16xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask2:
@@ -1506,7 +1506,7 @@ define <16 x i32> @test_masked_z_16xi32_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2:
@@ -1526,7 +1526,7 @@ define <16 x i32> @test_16xi32_perm_mem_
; GENERIC-LABEL: test_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_perm_mem_mask3:
@@ -1543,7 +1543,7 @@ define <16 x i32> @test_masked_16xi32_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi32_perm_mem_mask3:
@@ -1564,7 +1564,7 @@ define <16 x i32> @test_masked_z_16xi32_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3:
@@ -1757,7 +1757,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) {
; GENERIC-LABEL: test_4xi64_perm_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_perm_mem_mask0:
@@ -1772,7 +1772,7 @@ define <4 x i64> @test_masked_4xi64_perm
; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mem_mask0:
@@ -1791,7 +1791,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0:
@@ -1810,7 +1810,7 @@ define <4 x i64> @test_masked_4xi64_perm
; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mem_mask1:
@@ -1829,7 +1829,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1:
@@ -1848,7 +1848,7 @@ define <4 x i64> @test_masked_4xi64_perm
; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mem_mask2:
@@ -1867,7 +1867,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2:
@@ -1885,7 +1885,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) {
; GENERIC-LABEL: test_4xi64_perm_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_perm_mem_mask3:
@@ -1900,7 +1900,7 @@ define <4 x i64> @test_masked_4xi64_perm
; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi64_perm_mem_mask3:
@@ -1919,7 +1919,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3:
@@ -2293,7 +2293,7 @@ define <8 x i64> @test_8xi64_perm_mem_ma
; GENERIC-LABEL: test_8xi64_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_perm_mem_mask0:
@@ -2310,7 +2310,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_mem_mask0:
@@ -2331,7 +2331,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0:
@@ -2351,7 +2351,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
@@ -2370,7 +2370,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
@@ -2390,7 +2390,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_mem_mask2:
@@ -2411,7 +2411,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2:
@@ -2430,7 +2430,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) {
; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_perm_imm_mem_mask3:
@@ -2445,7 +2445,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
@@ -2464,7 +2464,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
@@ -2484,7 +2484,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_mem_mask4:
@@ -2505,7 +2505,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4:
@@ -2525,7 +2525,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
@@ -2544,7 +2544,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
@@ -2563,7 +2563,7 @@ define <8 x i64> @test_8xi64_perm_mem_ma
; GENERIC-LABEL: test_8xi64_perm_mem_mask6:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_perm_mem_mask6:
@@ -2580,7 +2580,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_mem_mask6:
@@ -2601,7 +2601,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6:
@@ -2621,7 +2621,7 @@ define <8 x i64> @test_masked_8xi64_perm
; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
@@ -2640,7 +2640,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00]
+; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
@@ -2853,7 +2853,7 @@ define <8 x float> @test_8xfloat_perm_me
; GENERIC-LABEL: test_8xfloat_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_perm_mem_mask0:
@@ -2870,7 +2870,7 @@ define <8 x float> @test_masked_8xfloat_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0:
@@ -2891,7 +2891,7 @@ define <8 x float> @test_masked_z_8xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
@@ -2912,7 +2912,7 @@ define <8 x float> @test_masked_8xfloat_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1:
@@ -2933,7 +2933,7 @@ define <8 x float> @test_masked_z_8xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1:
@@ -2954,7 +2954,7 @@ define <8 x float> @test_masked_8xfloat_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2:
@@ -2975,7 +2975,7 @@ define <8 x float> @test_masked_z_8xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
@@ -2995,7 +2995,7 @@ define <8 x float> @test_8xfloat_perm_me
; GENERIC-LABEL: test_8xfloat_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_perm_mem_mask3:
@@ -3012,7 +3012,7 @@ define <8 x float> @test_masked_8xfloat_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3:
@@ -3033,7 +3033,7 @@ define <8 x float> @test_masked_z_8xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3:
@@ -3247,7 +3247,7 @@ define <16 x float> @test_16xfloat_perm_
; GENERIC-LABEL: test_16xfloat_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_perm_mem_mask0:
@@ -3264,7 +3264,7 @@ define <16 x float> @test_masked_16xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0:
@@ -3285,7 +3285,7 @@ define <16 x float> @test_masked_z_16xfl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
@@ -3306,7 +3306,7 @@ define <16 x float> @test_masked_16xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1:
@@ -3327,7 +3327,7 @@ define <16 x float> @test_masked_z_16xfl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1:
@@ -3348,7 +3348,7 @@ define <16 x float> @test_masked_16xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2:
@@ -3369,7 +3369,7 @@ define <16 x float> @test_masked_z_16xfl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
@@ -3389,7 +3389,7 @@ define <16 x float> @test_16xfloat_perm_
; GENERIC-LABEL: test_16xfloat_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_perm_mem_mask3:
@@ -3406,7 +3406,7 @@ define <16 x float> @test_masked_16xfloa
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3:
@@ -3427,7 +3427,7 @@ define <16 x float> @test_masked_z_16xfl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3:
@@ -3620,7 +3620,7 @@ define <4 x double> @test_masked_z_4xdou
define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) {
; GENERIC-LABEL: test_4xdouble_perm_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_perm_mem_mask0:
@@ -3635,7 +3635,7 @@ define <4 x double> @test_masked_4xdoubl
; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0:
@@ -3654,7 +3654,7 @@ define <4 x double> @test_masked_z_4xdou
; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
@@ -3673,7 +3673,7 @@ define <4 x double> @test_masked_4xdoubl
; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1:
@@ -3692,7 +3692,7 @@ define <4 x double> @test_masked_z_4xdou
; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
@@ -3711,7 +3711,7 @@ define <4 x double> @test_masked_4xdoubl
; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2:
@@ -3730,7 +3730,7 @@ define <4 x double> @test_masked_z_4xdou
; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
@@ -3748,7 +3748,7 @@ define <4 x double> @test_masked_z_4xdou
define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) {
; GENERIC-LABEL: test_4xdouble_perm_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_perm_mem_mask3:
@@ -3763,7 +3763,7 @@ define <4 x double> @test_masked_4xdoubl
; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3:
@@ -3782,7 +3782,7 @@ define <4 x double> @test_masked_z_4xdou
; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
@@ -4156,7 +4156,7 @@ define <8 x double> @test_8xdouble_perm_
; GENERIC-LABEL: test_8xdouble_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_perm_mem_mask0:
@@ -4173,7 +4173,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0:
@@ -4194,7 +4194,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
@@ -4214,7 +4214,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
@@ -4233,7 +4233,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
@@ -4253,7 +4253,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2:
@@ -4274,7 +4274,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
@@ -4293,7 +4293,7 @@ define <8 x double> @test_masked_z_8xdou
define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) {
; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3:
@@ -4308,7 +4308,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
@@ -4327,7 +4327,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
@@ -4347,7 +4347,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4:
@@ -4368,7 +4368,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4:
@@ -4388,7 +4388,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
@@ -4407,7 +4407,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
@@ -4426,7 +4426,7 @@ define <8 x double> @test_8xdouble_perm_
; GENERIC-LABEL: test_8xdouble_perm_mem_mask6:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_perm_mem_mask6:
@@ -4443,7 +4443,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6:
@@ -4464,7 +4464,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
+; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6:
@@ -4484,7 +4484,7 @@ define <8 x double> @test_masked_8xdoubl
; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
@@ -4503,7 +4503,7 @@ define <8 x double> @test_masked_z_8xdou
; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
+; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
@@ -8941,7 +8941,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8962,7 +8962,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
@@ -8981,7 +8981,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9002,7 +9002,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
@@ -9021,7 +9021,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9042,7 +9042,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
@@ -9075,7 +9075,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9096,7 +9096,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
@@ -9288,7 +9288,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_shuff_mem_mask0:
@@ -9303,7 +9303,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9324,7 +9324,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
@@ -9343,7 +9343,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9364,7 +9364,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
@@ -9383,7 +9383,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9404,7 +9404,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
@@ -9422,7 +9422,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_shuff_mem_mask3:
@@ -9437,7 +9437,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9458,7 +9458,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
@@ -9665,7 +9665,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9686,7 +9686,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
@@ -9705,7 +9705,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9726,7 +9726,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
@@ -9745,7 +9745,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9766,7 +9766,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
@@ -9799,7 +9799,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9820,7 +9820,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
@@ -10012,7 +10012,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_shuff_mem_mask0:
@@ -10027,7 +10027,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10048,7 +10048,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
@@ -10067,7 +10067,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10088,7 +10088,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
@@ -10107,7 +10107,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10128,7 +10128,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
@@ -10146,7 +10146,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_shuff_mem_mask3:
@@ -10161,7 +10161,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10182,7 +10182,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
@@ -10374,7 +10374,7 @@ define <8 x i32> @test_8xi32_zero_masked
define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) {
; GENERIC-LABEL: test_8xi32_shuff_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_shuff_mem_mask0:
@@ -10389,7 +10389,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10410,7 +10410,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
@@ -10429,7 +10429,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10450,7 +10450,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
@@ -10469,7 +10469,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10490,7 +10490,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
@@ -10508,7 +10508,7 @@ define <8 x i32> @test_8xi32_zero_masked
define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) {
; GENERIC-LABEL: test_8xi32_shuff_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_shuff_mem_mask3:
@@ -10523,7 +10523,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10544,7 +10544,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
@@ -10736,7 +10736,7 @@ define <16 x i32> @test_16xi32_zero_mask
define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) {
; GENERIC-LABEL: test_16xi32_shuff_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_shuff_mem_mask0:
@@ -10751,7 +10751,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10772,7 +10772,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
@@ -10791,7 +10791,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10812,7 +10812,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
@@ -10831,7 +10831,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10852,7 +10852,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
@@ -10870,7 +10870,7 @@ define <16 x i32> @test_16xi32_zero_mask
define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) {
; GENERIC-LABEL: test_16xi32_shuff_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_shuff_mem_mask3:
@@ -10885,7 +10885,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10906,7 +10906,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
@@ -11098,7 +11098,7 @@ define <4 x i64> @test_4xi64_zero_masked
define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) {
; GENERIC-LABEL: test_4xi64_shuff_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_shuff_mem_mask0:
@@ -11113,7 +11113,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11134,7 +11134,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
@@ -11153,7 +11153,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11174,7 +11174,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
@@ -11193,7 +11193,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11214,7 +11214,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
@@ -11232,7 +11232,7 @@ define <4 x i64> @test_4xi64_zero_masked
define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) {
; GENERIC-LABEL: test_4xi64_shuff_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_shuff_mem_mask3:
@@ -11247,7 +11247,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11268,7 +11268,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
@@ -11460,7 +11460,7 @@ define <8 x i64> @test_8xi64_zero_masked
define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) {
; GENERIC-LABEL: test_8xi64_shuff_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_shuff_mem_mask0:
@@ -11475,7 +11475,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11496,7 +11496,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
@@ -11515,7 +11515,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11536,7 +11536,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
@@ -11555,7 +11555,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11576,7 +11576,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
@@ -11594,7 +11594,7 @@ define <8 x i64> @test_8xi64_zero_masked
define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) {
; GENERIC-LABEL: test_8xi64_shuff_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_shuff_mem_mask3:
@@ -11609,7 +11609,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11630,7 +11630,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
@@ -11837,7 +11837,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11858,7 +11858,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
@@ -11877,7 +11877,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11898,7 +11898,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
@@ -11917,7 +11917,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11938,7 +11938,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
@@ -11971,7 +11971,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11992,7 +11992,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
@@ -12836,7 +12836,7 @@ define <2 x double> @test_2xdouble_maske
; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12857,7 +12857,7 @@ define <2 x double> @test_2xdouble_zero_
; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
@@ -12876,7 +12876,7 @@ define <2 x double> @test_2xdouble_maske
; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12897,7 +12897,7 @@ define <2 x double> @test_2xdouble_zero_
; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13828,7 +13828,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13849,7 +13849,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
@@ -13868,7 +13868,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13889,7 +13889,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
@@ -13908,7 +13908,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13929,7 +13929,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
@@ -13962,7 +13962,7 @@ define <4 x float> @test_4xfloat_masked_
; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13983,7 +13983,7 @@ define <4 x float> @test_4xfloat_zero_ma
; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
@@ -14827,7 +14827,7 @@ define <2 x double> @test_2xdouble_maske
; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14848,7 +14848,7 @@ define <2 x double> @test_2xdouble_zero_
; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
@@ -14867,7 +14867,7 @@ define <2 x double> @test_2xdouble_maske
; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14888,7 +14888,7 @@ define <2 x double> @test_2xdouble_zero_
; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=331380&r1=331379&r2=331380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Wed May 2 10:58:50 2018
@@ -212,8 +212,8 @@ define void @test_vpermil2pd_128(<2 x do
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -234,8 +234,8 @@ define void @test_vpermil2pd_256(<4 x do
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -258,8 +258,8 @@ define void @test_vpermil2ps_128(<4 x fl
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -280,8 +280,8 @@ define void @test_vpermil2ps_256(<8 x fl
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
More information about the llvm-commits
mailing list