[llvm] r331264 - [X86] Split WriteFShuffle into XMM and YMM/ZMM scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 1 07:25:02 PDT 2018
Author: rksimon
Date: Tue May 1 07:25:01 2018
New Revision: 331264
URL: http://llvm.org/viewvc/llvm-project?rev=331264&view=rev
Log:
[X86] Split WriteFShuffle into XMM and YMM/ZMM scheduler classes
Removes more WriteFShuffle InstRW overrides
Modified:
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue May 1 07:25:01 2018
@@ -168,7 +168,8 @@ defm : BWWriteResPair<WriteFMAY, [BWPo
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
+defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : BWWriteResPair<WriteFShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector shuffles (YMM/ZMM).
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFVarShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends.
@@ -1088,8 +1089,6 @@ def: InstRW<[BWWriteResGroup75], (instre
"VPACKUSWBYrm",
"VPALIGNRYrmi",
"VPBLENDWYrmi",
- "VPERMILPDYmi",
- "VPERMILPSYmi",
"VPSHUFBYrm",
"VPSHUFDYmi",
"VPSHUFHWYmi",
@@ -1101,13 +1100,7 @@ def: InstRW<[BWWriteResGroup75], (instre
"VPUNPCKLBWYrm",
"VPUNPCKLDQYrm",
"VPUNPCKLQDQYrm",
- "VPUNPCKLWDYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPUNPCKLWDYrm")>;
def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 7;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue May 1 07:25:01 2018
@@ -165,7 +165,8 @@ defm : HWWriteResPair<WriteFMAY, [HWPor
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
-defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
@@ -900,8 +901,6 @@ def: InstRW<[HWWriteResGroup13_1], (inst
"VPACKUSWBYrm",
"VPALIGNRYrmi",
"VPBLENDWYrmi",
- "VPERMILPDYmi",
- "VPERMILPSYmi",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
"VPMOVSXWQYrm",
@@ -916,13 +915,7 @@ def: InstRW<[HWWriteResGroup13_1], (inst
"VPUNPCKLBWYrm",
"VPUNPCKLDQYrm",
"VPUNPCKLQDQYrm",
- "VPUNPCKLWDYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPUNPCKLWDYrm")>;
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue May 1 07:25:01 2018
@@ -152,7 +152,8 @@ defm : SBWriteResPair<WriteCvtF2F, [SBPo
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
+defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
@@ -1140,16 +1141,8 @@ def SBWriteResGroup73 : SchedWriteRes<[S
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm",
- "VPERMILPDYmi",
"VPERMILPDYrm",
- "VPERMILPSYmi",
- "VPERMILPSYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPERMILPSYrm")>;
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue May 1 07:25:01 2018
@@ -165,7 +165,8 @@ defm : SKLWriteResPair<WriteFMAY, [SKL
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
@@ -1519,9 +1520,7 @@ def: InstRW<[SKLWriteResGroup108], (inst
"VPBLENDWYrmi",
"VPBROADCASTBYrm",
"VPBROADCASTWYrm",
- "VPERMILPDYmi",
"VPERMILPDYrm",
- "VPERMILPSYmi",
"VPERMILPSYrm",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
@@ -1537,13 +1536,7 @@ def: InstRW<[SKLWriteResGroup108], (inst
"VPUNPCKLBWYrm",
"VPUNPCKLDQYrm",
"VPUNPCKLQDQYrm",
- "VPUNPCKLWDYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPUNPCKLWDYrm")>;
def SKLWriteResGroup109 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 8;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue May 1 07:25:01 2018
@@ -165,7 +165,8 @@ defm : SKXWriteResPair<WriteFMAY, [SKXPo
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
+defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
@@ -2703,17 +2704,11 @@ def: InstRW<[SKXWriteResGroup119], (inst
"VPBROADCASTWYrm",
"VPBROADCASTWZ256m(b?)",
"VPBROADCASTWZm(b?)",
- "VPERMILPDYmi",
"VPERMILPDYrm",
- "VPERMILPDZ256m(b?)i",
"VPERMILPDZ256rm(b?)",
- "VPERMILPDZm(b?)i",
"VPERMILPDZrm(b?)",
- "VPERMILPSYmi",
"VPERMILPSYrm",
- "VPERMILPSZ256m(b?)i",
"VPERMILPSZ256rm(b?)",
- "VPERMILPSZm(b?)i",
"VPERMILPSZrm(b?)",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
@@ -2757,25 +2752,7 @@ def: InstRW<[SKXWriteResGroup119], (inst
"VPUNPCKLQDQZrm(b?)",
"VPUNPCKLWDYrm",
"VPUNPCKLWDZ256rm(b?)",
- "VPUNPCKLWDZrm(b?)",
- "VSHUFPDYrmi",
- "VSHUFPDZ256rm(b?)i",
- "VSHUFPDZrm(b?)i",
- "VSHUFPSYrmi",
- "VSHUFPSZ256rm(b?)i",
- "VSHUFPSZrm(b?)i",
- "VUNPCKHPDYrm",
- "VUNPCKHPDZ256rm(b?)",
- "VUNPCKHPDZrm(b?)",
- "VUNPCKHPSYrm",
- "VUNPCKHPSZ256rm(b?)",
- "VUNPCKHPSZrm(b?)",
- "VUNPCKLPDYrm",
- "VUNPCKLPDZ256rm(b?)",
- "VUNPCKLPDZrm(b?)",
- "VUNPCKLPSYrm",
- "VUNPCKLPSZ256rm(b?)",
- "VUNPCKLPSZrm(b?)")>;
+ "VPUNPCKLWDZrm(b?)")>;
def SKXWriteResGroup120 : SchedWriteRes<[SKXPort01,SKXPort23]> {
let Latency = 8;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue May 1 07:25:01 2018
@@ -105,6 +105,7 @@ defm WriteFSign : X86SchedWritePair; //
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM/ZMM).
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM).
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
@@ -212,7 +213,7 @@ def SchedWriteFLogic
def SchedWriteFShuffle
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
- WriteFShuffle, WriteFShuffle>;
+ WriteFShuffleY, WriteFShuffleY>;
def SchedWriteFVarShuffle
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
WriteFVarShuffleY, WriteFVarShuffleY>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue May 1 07:25:01 2018
@@ -214,6 +214,7 @@ defm : AtomWriteResPair<WriteFSign,
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteFShuffleY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFVarShuffleY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue May 1 07:25:01 2018
@@ -330,6 +330,7 @@ defm : JWriteResFpuPair<WriteFSign,
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
+defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
@@ -685,27 +686,6 @@ def JWriteVCVTPDYLd: SchedWriteRes<[JLAG
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
-def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> {
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
- VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri,
- VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr,
- VUNPCKLPDYrr, VUNPCKLPSYrr)>;
-
-def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm,
- VMOVSLDUPYrm, VPERMILPDYmi,
- VPERMILPSYmi, VSHUFPDYrmi,
- VSHUFPSYrmi, VUNPCKHPDYrm,
- VUNPCKHPSYrm, VUNPCKLPDYrm,
- VUNPCKLPSYrm)>;
-
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue May 1 07:25:01 2018
@@ -143,7 +143,8 @@ defm : SLMWriteResPair<WriteCvtF2F, [SLM
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue May 1 07:25:01 2018
@@ -209,6 +209,7 @@ defm : ZnWriteResFpuPair<WriteFSign,
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>;
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=331264&r1=331263&r2=331264&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Tue May 1 07:25:01 2018
@@ -8572,7 +8572,7 @@ define <16 x i32> @test2_masked_z_16xi32
define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
; GENERIC-LABEL: test2_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00]
+; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_16xi32_perm_mem_mask0:
@@ -8700,7 +8700,7 @@ define <16 x i32> @test2_masked_z_16xi32
define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
; GENERIC-LABEL: test2_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00]
+; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_16xi32_perm_mem_mask3:
@@ -12199,7 +12199,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12220,7 +12220,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
@@ -12239,7 +12239,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12260,7 +12260,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
@@ -12279,7 +12279,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12300,7 +12300,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
@@ -12333,7 +12333,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12354,7 +12354,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
@@ -12546,7 +12546,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0:
@@ -12561,7 +12561,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12582,7 +12582,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
@@ -12601,7 +12601,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12622,7 +12622,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
@@ -12641,7 +12641,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12662,7 +12662,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
@@ -12680,7 +12680,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3:
@@ -12695,7 +12695,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12716,7 +12716,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
@@ -13104,7 +13104,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13125,7 +13125,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
@@ -13144,7 +13144,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13165,7 +13165,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13184,7 +13184,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13205,7 +13205,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
@@ -13238,7 +13238,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13259,7 +13259,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
@@ -13451,7 +13451,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0:
@@ -13466,7 +13466,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13487,7 +13487,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
@@ -13506,7 +13506,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13527,7 +13527,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13546,7 +13546,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13567,7 +13567,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
@@ -13585,7 +13585,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3:
@@ -13600,7 +13600,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13621,7 +13621,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
@@ -14190,7 +14190,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14211,7 +14211,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
@@ -14230,7 +14230,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14251,7 +14251,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
@@ -14270,7 +14270,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14291,7 +14291,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
@@ -14324,7 +14324,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14345,7 +14345,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
@@ -14537,7 +14537,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0:
@@ -14552,7 +14552,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14573,7 +14573,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
@@ -14592,7 +14592,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14613,7 +14613,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
@@ -14632,7 +14632,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14653,7 +14653,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
@@ -14671,7 +14671,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3:
@@ -14686,7 +14686,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14707,7 +14707,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
@@ -15095,7 +15095,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15116,7 +15116,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
@@ -15135,7 +15135,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15156,7 +15156,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
@@ -15175,7 +15175,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15196,7 +15196,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
@@ -15229,7 +15229,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15250,7 +15250,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
@@ -15442,7 +15442,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0:
@@ -15457,7 +15457,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15478,7 +15478,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
@@ -15497,7 +15497,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15518,7 +15518,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
@@ -15537,7 +15537,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15558,7 +15558,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
@@ -15576,7 +15576,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3:
@@ -15591,7 +15591,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15612,7 +15612,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
More information about the llvm-commits
mailing list