[llvm] r331659 - [X86][AVX2] Tag VPMOVSX/VPMOVZX ymm instructions as WriteShuffle256
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon May 7 11:25:19 PDT 2018
Author: rksimon
Date: Mon May 7 11:25:19 2018
New Revision: 331659
URL: http://llvm.org/viewvc/llvm-project?rev=331659&view=rev
Log:
[X86][AVX2] Tag VPMOVSX/VPMOVZX ymm instructions as WriteShuffle256
These are more like cross-lane shuffles than regular shuffles - we already do this for AVX512 equivalents.
Differential Revision: https://reviews.llvm.org/D46229
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon May 7 11:25:19 2018
@@ -4881,26 +4881,29 @@ multiclass SS41I_pmovx_rrrm<bits<8> opc,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[sched.Folded]>;
}
-// FIXME: YMM cases should use SchedWriteShuffle.YMM.
+
multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
X86MemOperand MemOp, X86MemOperand MemYOp,
- X86SchedWriteWidths sched, Predicate prd> {
- defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, sched.XMM>;
+ Predicate prd> {
+ defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
+ SchedWriteShuffle.XMM>;
let Predicates = [HasAVX, prd] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
- VR128, VR128, sched.XMM>, VEX, VEX_WIG;
+ VR128, VR128, SchedWriteShuffle.XMM>,
+ VEX, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
- VR256, VR128, sched.XMM>, VEX, VEX_L, VEX_WIG;
+ VR256, VR128, WriteShuffle256>,
+ VEX, VEX_L, VEX_WIG;
}
multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
X86MemOperand MemYOp, Predicate prd> {
defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
- MemOp, MemYOp, SchedWriteShuffle, prd>;
+ MemOp, MemYOp, prd>;
defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
!strconcat("pmovzx", OpcodeStr),
- MemOp, MemYOp, SchedWriteShuffle, prd>;
+ MemOp, MemYOp, prd>;
}
defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>;
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Mon May 7 11:25:19 2018
@@ -687,19 +687,7 @@ def BWWriteResGroup28 : SchedWriteRes<[B
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr",
- "VPBROADCASTWrr",
- "VPMOVSXBDYrr",
- "VPMOVSXBQYrr",
- "VPMOVSXBWYrr",
- "VPMOVSXDQYrr",
- "VPMOVSXWDYrr",
- "VPMOVSXWQYrr",
- "VPMOVZXBDYrr",
- "VPMOVZXBQYrr",
- "VPMOVZXBWYrr",
- "VPMOVZXDQYrr",
- "VPMOVZXWDYrr",
- "VPMOVZXWQYrr")>;
+ "VPBROADCASTWrr")>;
def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> {
let Latency = 2;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Mon May 7 11:25:19 2018
@@ -1287,19 +1287,7 @@ def HWWriteResGroup51 : SchedWriteRes<[H
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup51], (instregex "VPBROADCASTBrr",
- "VPBROADCASTWrr",
- "VPMOVSXBDYrr",
- "VPMOVSXBQYrr",
- "VPMOVSXBWYrr",
- "VPMOVSXDQYrr",
- "VPMOVSXWDYrr",
- "VPMOVSXWQYrr",
- "VPMOVZXBDYrr",
- "VPMOVZXBQYrr",
- "VPMOVZXBWYrr",
- "VPMOVZXDQYrr",
- "VPMOVZXWDYrr",
- "VPMOVZXWQYrr")>;
+ "VPBROADCASTWrr")>;
def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 9;
@@ -1320,17 +1308,6 @@ def: InstRW<[HWWriteResGroup52_1], (inst
"VCVTPS2DQYrm",
"VCVTTPS2DQYrm")>;
-def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm",
- "VPMOVZXBQYrm",
- "VPMOVZXBWYrm",
- "VPMOVZXDQYrm",
- "VPMOVZXWQYrm")>;
-
def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 2;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Mon May 7 11:25:19 2018
@@ -731,19 +731,7 @@ def: InstRW<[SKLWriteResGroup30], (instr
"(ADD|SUB|SUBR)_FrST0",
"VPBROADCASTBrr",
"VPBROADCASTWrr",
- "(V?)PCMPGTQ(Y?)rr",
- "VPMOVSXBDYrr",
- "VPMOVSXBQYrr",
- "VPMOVSXBWYrr",
- "VPMOVSXDQYrr",
- "VPMOVSXWDYrr",
- "VPMOVSXWQYrr",
- "VPMOVZXBDYrr",
- "VPMOVZXBQYrr",
- "VPMOVZXBWYrr",
- "VPMOVZXDQYrr",
- "VPMOVZXWDYrr",
- "VPMOVZXWQYrr")>;
+ "(V?)PCMPGTQ(Y?)rr")>;
def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 3;
@@ -1558,12 +1546,7 @@ def SKLWriteResGroup133 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
- "VPCMPGTQYrm",
- "VPMOVZXBDYrm",
- "VPMOVZXBQYrm",
- "VPMOVZXBWYrm",
- "VPMOVZXDQYrm",
- "VPMOVZXWQYrm")>;
+ "VPCMPGTQYrm")>;
def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 10;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Mon May 7 11:25:19 2018
@@ -1062,18 +1062,6 @@ def: InstRW<[SKXWriteResGroup32], (instr
"VPMINUQZ128rr",
"VPMINUQZ256rr",
"VPMINUQZrr",
- "VPMOVSXBDYrr",
- "VPMOVSXBQYrr",
- "VPMOVSXBWYrr",
- "VPMOVSXDQYrr",
- "VPMOVSXWDYrr",
- "VPMOVSXWQYrr",
- "VPMOVZXBDYrr",
- "VPMOVZXBQYrr",
- "VPMOVZXBWYrr",
- "VPMOVZXDQYrr",
- "VPMOVZXWDYrr",
- "VPMOVZXWQYrr",
"VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
"VPTESTMBZ128rr",
"VPTESTMBZ256rr",
@@ -2603,11 +2591,6 @@ def: InstRW<[SKXWriteResGroup148], (inst
"VPMINSQZrm(b?)",
"VPMINUQZ256rm(b?)",
"VPMINUQZrm(b?)",
- "VPMOVZXBDYrm",
- "VPMOVZXBQYrm",
- "VPMOVZXBWYrm",
- "VPMOVZXDQYrm",
- "VPMOVZXWQYrm",
"VPTESTMBZ256rm(b?)",
"VPTESTMBZrm(b?)",
"VPTESTMDZ256rm(b?)",
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Mon May 7 11:25:19 2018
@@ -940,15 +940,20 @@ def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU
let NumMicroOps = 2;
}
def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ;
+def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr",
"MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm",
"MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
-// VPMOVSX/ZX BW BD BQ DW DQ.
+// VPMOVSX/ZX BW BD BQ WD WQ DQ.
// y <- x.
-def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
+def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>;
+def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>;
def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Mon May 7 11:25:19 2018
@@ -4138,7 +4138,7 @@ define <8 x i32> @test_pmovsxbd(<16 x i8
; GENERIC-LABEL: test_pmovsxbd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4173,7 +4173,7 @@ define <8 x i32> @test_pmovsxbd(<16 x i8
; ZNVER1-LABEL: test_pmovsxbd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25]
+; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -4189,7 +4189,7 @@ define <4 x i64> @test_pmovsxbq(<16 x i8
; GENERIC-LABEL: test_pmovsxbq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4240,7 +4240,7 @@ define <16 x i16> @test_pmovsxbw(<16 x i
; GENERIC-LABEL: test_pmovsxbw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4289,7 +4289,7 @@ define <4 x i64> @test_pmovsxdq(<4 x i32
; GENERIC-LABEL: test_pmovsxdq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4338,7 +4338,7 @@ define <8 x i32> @test_pmovsxwd(<8 x i16
; GENERIC-LABEL: test_pmovsxwd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4373,7 +4373,7 @@ define <8 x i32> @test_pmovsxwd(<8 x i16
; ZNVER1-LABEL: test_pmovsxwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25]
+; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = sext <8 x i16> %a0 to <8 x i32>
@@ -4387,7 +4387,7 @@ define <4 x i64> @test_pmovsxwq(<8 x i16
; GENERIC-LABEL: test_pmovsxwq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4422,7 +4422,7 @@ define <4 x i64> @test_pmovsxwq(<8 x i16
; ZNVER1-LABEL: test_pmovsxwq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25]
+; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -4438,7 +4438,7 @@ define <8 x i32> @test_pmovzxbd(<16 x i8
; GENERIC-LABEL: test_pmovzxbd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4473,7 +4473,7 @@ define <8 x i32> @test_pmovzxbd(<16 x i8
; ZNVER1-LABEL: test_pmovzxbd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25]
+; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50]
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -4489,7 +4489,7 @@ define <4 x i64> @test_pmovzxbq(<16 x i8
; GENERIC-LABEL: test_pmovzxbq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4540,7 +4540,7 @@ define <16 x i16> @test_pmovzxbw(<16 x i
; GENERIC-LABEL: test_pmovzxbw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4589,7 +4589,7 @@ define <4 x i64> @test_pmovzxdq(<4 x i32
; GENERIC-LABEL: test_pmovzxdq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4638,7 +4638,7 @@ define <8 x i32> @test_pmovzxwd(<8 x i16
; GENERIC-LABEL: test_pmovzxwd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4673,7 +4673,7 @@ define <8 x i32> @test_pmovzxwd(<8 x i16
; ZNVER1-LABEL: test_pmovzxwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
+; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = zext <8 x i16> %a0 to <8 x i32>
@@ -4687,7 +4687,7 @@ define <4 x i64> @test_pmovzxwq(<8 x i16
; GENERIC-LABEL: test_pmovzxwq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4722,7 +4722,7 @@ define <4 x i64> @test_pmovzxwq(<8 x i16
; ZNVER1-LABEL: test_pmovzxwq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
+; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Mon May 7 11:25:19 2018
@@ -3487,7 +3487,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
; GENERIC-LABEL: sext_4x8mem_to_4x64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x8mem_to_4x64:
@@ -3653,7 +3653,7 @@ define <8 x i32> @sext_8x16mem_to_8x32ma
define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
; GENERIC-LABEL: sext_8x16mem_to_8x32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x16mem_to_8x32:
@@ -3883,7 +3883,7 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
; GENERIC-LABEL: sext_4x16mem_to_4x64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x16mem_to_4x64:
@@ -4081,7 +4081,7 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
; GENERIC-LABEL: sext_4x32mem_to_4x64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x32mem_to_4x64:
Modified: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s?rev=331659&r1=331658&r2=331659&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx2.s Mon May 7 11:25:19 2018
@@ -620,30 +620,30 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 2 2.00 vpmovmskb %ymm0, %ecx
-# CHECK-NEXT: 1 1 0.25 vpmovsxbd %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %ymm2
+# CHECK-NEXT: 2 1 0.50 vpmovsxbd %xmm0, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxbq %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovsxbq (%rax), %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovsxbq (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxbw %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovsxbw (%rax), %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovsxbw (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxdq %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovsxdq (%rax), %ymm2
-# CHECK-NEXT: 1 1 0.25 vpmovsxwd %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovsxwd (%rax), %ymm2
-# CHECK-NEXT: 1 1 0.25 vpmovsxwq %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovsxwq (%rax), %ymm2
-# CHECK-NEXT: 1 1 0.25 vpmovzxbd %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovsxdq (%rax), %ymm2
+# CHECK-NEXT: 2 1 0.50 vpmovsxwd %xmm0, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovsxwd (%rax), %ymm2
+# CHECK-NEXT: 2 1 0.50 vpmovsxwq %xmm0, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovsxwq (%rax), %ymm2
+# CHECK-NEXT: 2 1 0.50 vpmovzxbd %xmm0, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxbq %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovzxbq (%rax), %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovzxbq (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxbw %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxdq %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovzxdq (%rax), %ymm2
-# CHECK-NEXT: 1 1 0.25 vpmovzxwd %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovzxwd (%rax), %ymm2
-# CHECK-NEXT: 1 1 0.25 vpmovzxwq %xmm0, %ymm2
-# CHECK-NEXT: 1 8 0.50 * vpmovzxwq (%rax), %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovzxdq (%rax), %ymm2
+# CHECK-NEXT: 2 1 0.50 vpmovzxwd %xmm0, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovzxwd (%rax), %ymm2
+# CHECK-NEXT: 2 1 0.50 vpmovzxwq %xmm0, %ymm2
+# CHECK-NEXT: 2 8 0.50 * vpmovzxwq (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpmuldq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmulhrsw %ymm0, %ymm1, %ymm2
@@ -773,7 +773,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 63.50 63.50 - - - - - 77.17 70.67 80.00 47.17 -
+# CHECK-NEXT: 63.50 63.50 - - - - - 72.67 75.17 84.50 42.67 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -940,30 +940,30 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpminuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpminuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 2.00 - - vpmovmskb %ymm0, %ecx
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbd (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbq (%rax), %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbq (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbw %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbw (%rax), %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbw (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxdq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxdq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbd (%rax), %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxdq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbd (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbq (%rax), %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbq (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbw %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbw (%rax), %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbw (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxdq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxdq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwq (%rax), %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxdq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwq (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - vpmuldq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmulhrsw %ymm0, %ymm1, %ymm2
More information about the llvm-commits
mailing list