[llvm] r328914 - [X86] Add SchedRW for PMULLD

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 30 21:54:32 PDT 2018


Author: ctopper
Date: Fri Mar 30 21:54:32 2018
New Revision: 328914

URL: http://llvm.org/viewvc/llvm-project?rev=328914&view=rev
Log:
[X86] Add SchedRW for PMULLD

Summary:
It seems many CPUs don't implement this instruction as well as the other vector multiplies. Often using a multi uop flow. Silvermont in particular has a 7 uop flow with 11 cycle throughput. Sandy Bridge implements it as a single uop with 5 cycle latency and 1 cycle throughput. But Haswell and later use 2 uops with 10 cycle latency and 2 cycle throughput.

This patch adds a new X86SchedWritePair we can use to tag this instruction separately. I've provided correct information for Silvermont, Btver2, and Sandy Bridge. I've removed the InstRWs for SandyBridge. I've left Haswell/Broadwell/Skylake InstRWs in place because I wasn't sure how to account for the different load latency between 128 and 256 bits. I also left Znver1 InstRWs in place because the existing values don't match Agner's spreadsheet.

I also left a FIXME in the SandyBridge model because it being used for the "generic" model is too optimistic for the 256/512-bit versions since those are multiple uops on all known CPUs.

Reviewers: RKSimon, GGanesh, courbet

Reviewed By: RKSimon

Subscribers: gchatelet, gbedwell, andreadb, llvm-commits

Differential Revision: https://reviews.llvm.org/D44972

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/slow-pmulld.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Mar 30 21:54:32 2018
@@ -4505,7 +4505,7 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xD
 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
                                      SSE_INTALU_ITINS_P, HasBWI, 0>;
 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
-                                    SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
+                                    SSE_PMULLD_ITINS, HasAVX512, 1>, T8PD;
 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
                                     SSE_INTMUL_ITINS_P, HasBWI, 1>;
 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Mar 30 21:54:32 2018
@@ -195,7 +195,7 @@ def SSE_MPSADBW_ITINS : OpndItins<
   IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
 >;
 
-let Sched = WriteVecIMul in
+let Sched = WritePMULLD in
 def SSE_PMULLD_ITINS : OpndItins<
   IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
 >;

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Fri Mar 30 21:54:32 2018
@@ -167,6 +167,7 @@ def  : WriteRes<WriteVecMove,        [BW
 defm : BWWriteResPair<WriteVecALU,   [BWPort15],  1>; // Vector integer ALU op, no logicals.
 defm : BWWriteResPair<WriteVecShift, [BWPort0],  1>; // Vector integer shifts.
 defm : BWWriteResPair<WriteVecIMul,  [BWPort0],   5>; // Vector integer multiply.
+defm : BWWriteResPair<WritePMULLD,   [BWPort0], 10, [2], 2, 5>; // PMULLD
 defm : BWWriteResPair<WriteShuffle,  [BWPort5],  1>; // Vector shuffles.
 defm : BWWriteResPair<WriteBlend,  [BWPort15],  1>; // Vector blends.
 defm : BWWriteResPair<WriteVarBlend,  [BWPort5], 2, [2]>; // Vector variable blends.
@@ -2180,13 +2181,6 @@ def BWWriteResGroup113 : SchedWriteRes<[
 def: InstRW<[BWWriteResGroup113], (instregex "LAR(16|32|64)rm",
                                              "LSL(16|32|64)rm")>;
 
-def BWWriteResGroup114 : SchedWriteRes<[BWPort0]> {
-  let Latency = 10;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[BWWriteResGroup114], (instregex "(V?)PMULLD(Y?)rr")>;
-
 def BWWriteResGroup115 : SchedWriteRes<[BWPort0,BWPort23]> {
   let Latency = 10;
   let NumMicroOps = 2;
@@ -2462,13 +2456,6 @@ def: InstRW<[BWWriteResGroup147], (instr
                                              "DIVR_FST0r",
                                              "DIVR_FrST0")>;
 
-def BWWriteResGroup148 : SchedWriteRes<[BWPort0,BWPort23]> {
-  let Latency = 15;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup148], (instregex "(V?)PMULLDrm")>;
-
 def BWWriteResGroup149 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
   let Latency = 15;
   let NumMicroOps = 10;

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Fri Mar 30 21:54:32 2018
@@ -163,6 +163,7 @@ defm : HWWriteResPair<WriteVecShift, [HW
 defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1>;
 defm : HWWriteResPair<WriteVecALU,   [HWPort15],  1>;
 defm : HWWriteResPair<WriteVecIMul,  [HWPort0],   5>;
+defm : HWWriteResPair<WritePMULLD,   [HWPort0], 10, [2], 2, 6>;
 defm : HWWriteResPair<WriteShuffle,  [HWPort5],  1>;
 defm : HWWriteResPair<WriteBlend,  [HWPort15],  1>;
 defm : HWWriteResPair<WriteShuffle256,  [HWPort5],  3>;
@@ -2680,20 +2681,6 @@ def HWWriteResGroup117 : SchedWriteRes<[
 }
 def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>;
 
-def HWWriteResGroup118 : SchedWriteRes<[HWPort0]> {
-  let Latency = 10;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[HWWriteResGroup118], (instregex "(V?)PMULLD(Y?)rr")>;
-
-def HWWriteResGroup119 : SchedWriteRes<[HWPort0,HWPort23]> {
-  let Latency = 16;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup119], (instregex "(V?)PMULLDrm")>;
-
 def HWWriteResGroup119_1 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 17;
   let NumMicroOps = 3;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri Mar 30 21:54:32 2018
@@ -151,6 +151,7 @@ defm : SBWriteResPair<WriteVecShift, [SB
 defm : SBWriteResPair<WriteVecLogic, [SBPort5], 1>;
 defm : SBWriteResPair<WriteVecALU,   [SBPort1], 3>;
 defm : SBWriteResPair<WriteVecIMul,  [SBPort0], 5>;
+defm : SBWriteResPair<WritePMULLD,   [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
 defm : SBWriteResPair<WriteShuffle,  [SBPort5], 1>;
 defm : SBWriteResPair<WriteBlend,   [SBPort15], 1>;
 defm : SBWriteResPair<WriteVarBlend, [SBPort1, SBPort5], 2>;
@@ -672,7 +673,6 @@ def: InstRW<[SBWriteResGroup20], (instre
                                             "(V?)PMULHRSWrr",
                                             "(V?)PMULHUWrr",
                                             "(V?)PMULHWrr",
-                                            "(V?)PMULLDrr",
                                             "(V?)PMULLWrr",
                                             "(V?)PMULUDQrr",
                                             "(V?)PSADBWrr")>;
@@ -1602,7 +1602,6 @@ def: InstRW<[SBWriteResGroup89], (instre
                                             "(V?)PMULHRSWrm",
                                             "(V?)PMULHUWrm",
                                             "(V?)PMULHWrm",
-                                            "(V?)PMULLDrm",
                                             "(V?)PMULLWrm",
                                             "(V?)PMULUDQrm",
                                             "(V?)PSADBWrm")>;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Fri Mar 30 21:54:32 2018
@@ -164,6 +164,7 @@ def  : WriteRes<WriteVecMove,         [S
 defm : SKLWriteResPair<WriteVecALU,   [SKLPort15],  1>; // Vector integer ALU op, no logicals.
 defm : SKLWriteResPair<WriteVecShift, [SKLPort0],  1>; // Vector integer shifts.
 defm : SKLWriteResPair<WriteVecIMul,  [SKLPort0],   5>; // Vector integer multiply.
+defm : SKLWriteResPair<WritePMULLD,   [SKLPort01], 10, [2], 2, 6>;
 defm : SKLWriteResPair<WriteShuffle,  [SKLPort5],  1>; // Vector shuffles.
 defm : SKLWriteResPair<WriteBlend,  [SKLPort15],  1>; // Vector blends.
 defm : SKLWriteResPair<WriteVarBlend,  [SKLPort5], 2, [2]>; // Vector variable blends.
@@ -1849,13 +1850,6 @@ def: InstRW<[SKLWriteResGroup105], (inst
                                               "(V?)ROUNDSDr",
                                               "(V?)ROUNDSSr")>;
 
-def SKLWriteResGroup105_2 : SchedWriteRes<[SKLPort01]> {
-  let Latency = 10;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[SKLWriteResGroup105_2], (instregex "(V?)PMULLD(Y?)rr")>;
-
 def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
@@ -2559,13 +2553,6 @@ def: InstRW<[SKLWriteResGroup168], (inst
 def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSDm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSSm")>;
 
-def SKLWriteResGroup168_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
-  let Latency = 16;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,2];
-}
-def: InstRW<[SKLWriteResGroup168_2], (instregex "(V?)PMULLDrm")>;
-
 def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let Latency = 14;
   let NumMicroOps = 3;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Fri Mar 30 21:54:32 2018
@@ -164,6 +164,7 @@ def  : WriteRes<WriteVecMove,         [S
 defm : SKXWriteResPair<WriteVecALU,   [SKXPort15],  1>; // Vector integer ALU op, no logicals.
 defm : SKXWriteResPair<WriteVecShift, [SKXPort0],  1>; // Vector integer shifts.
 defm : SKXWriteResPair<WriteVecIMul,  [SKXPort0],   5>; // Vector integer multiply.
+defm : SKXWriteResPair<WritePMULLD,   [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
 defm : SKXWriteResPair<WriteShuffle,  [SKXPort5],  1>; // Vector shuffles.
 defm : SKXWriteResPair<WriteBlend,  [SKXPort15],  1>; // Vector blends.
 defm : SKXWriteResPair<WriteVarBlend,  [SKXPort5], 2, [2]>; // Vector variable blends.

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Fri Mar 30 21:54:32 2018
@@ -100,6 +100,7 @@ def  WriteVecMove  : SchedWrite;
 defm WriteVecALU   : X86SchedWritePair; // Vector integer ALU op, no logicals.
 defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
 defm WriteVecIMul  : X86SchedWritePair; // Vector integer multiply.
+defm WritePMULLD : X86SchedWritePair; // PMULLD
 defm WriteShuffle  : X86SchedWritePair; // Vector shuffles.
 defm WriteBlend  : X86SchedWritePair; // Vector blends.
 defm WriteVarBlend  : X86SchedWritePair; // Vector variable blends.

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri Mar 30 21:54:32 2018
@@ -345,6 +345,7 @@ def  : WriteRes<WriteVecMove,
 defm : JWriteResFpuPair<WriteVecALU,      [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecShift,    [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecIMul,     [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WritePMULLD,      [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
 defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2]>;
 defm : JWriteResFpuPair<WriteShuffle,     [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteBlend,       [JFPU01, JVALU], 1>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Fri Mar 30 21:54:32 2018
@@ -138,6 +138,7 @@ defm : SLMWriteResPair<WriteVecShift, [S
 defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteVecALU,   [SLM_FPC_RSV01],  1>;
 defm : SLMWriteResPair<WriteVecIMul,  [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   11, [11], 7>;
 defm : SLMWriteResPair<WriteShuffle,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteBlend,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteMPSAD,  [SLM_FPC_RSV0],  7>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Fri Mar 30 21:54:32 2018
@@ -213,6 +213,7 @@ defm : ZnWriteResFpuPair<WriteVecLogic,
 defm : ZnWriteResFpuPair<WritePHAdd,      [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecALU,     [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecIMul,    [ZnFPU0],  4>;
+defm : ZnWriteResFpuPair<WritePMULLD,     [ZnFPU0],  4>; // FIXME
 defm : ZnWriteResFpuPair<WriteShuffle,    [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteBlend,      [ZnFPU01], 1>;
 defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2>;

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Fri Mar 30 21:54:32 2018
@@ -4911,7 +4911,7 @@ define <8 x i32> @test_pmulld(<8 x i32>
 ; GENERIC-LABEL: test_pmulld:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; GENERIC-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulld:

Modified: llvm/trunk/test/CodeGen/X86/slow-pmulld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/slow-pmulld.ll?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/slow-pmulld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/slow-pmulld.ll Fri Mar 30 21:54:32 2018
@@ -1215,34 +1215,32 @@ define <8 x i32> @test_mul_v8i32_v8i16_m
 define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
 ; SLM32-LABEL: test_mul_v16i32_v16i16_minsize:
 ; SLM32:       # %bb.0:
-; SLM32-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
-; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; SLM32-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM32-NEXT:    movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
+; SLM32-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SLM32-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
 ; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SLM32-NEXT:    movdqa {{.*#+}} xmm1 = [18778,18778,18778,18778]
-; SLM32-NEXT:    pmulld %xmm1, %xmm4
-; SLM32-NEXT:    pmulld %xmm1, %xmm0
-; SLM32-NEXT:    pmulld %xmm1, %xmm2
-; SLM32-NEXT:    pmulld %xmm1, %xmm3
-; SLM32-NEXT:    movdqa %xmm4, %xmm1
+; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
+; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; SLM32-NEXT:    pmulld %xmm5, %xmm0
+; SLM32-NEXT:    pmulld %xmm5, %xmm2
+; SLM32-NEXT:    pmulld %xmm5, %xmm1
+; SLM32-NEXT:    pmulld %xmm5, %xmm3
 ; SLM32-NEXT:    retl
 ;
 ; SLM64-LABEL: test_mul_v16i32_v16i16_minsize:
 ; SLM64:       # %bb.0:
-; SLM64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
-; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; SLM64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM64-NEXT:    movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
+; SLM64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SLM64-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
 ; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SLM64-NEXT:    movdqa {{.*#+}} xmm1 = [18778,18778,18778,18778]
-; SLM64-NEXT:    pmulld %xmm1, %xmm4
-; SLM64-NEXT:    pmulld %xmm1, %xmm0
-; SLM64-NEXT:    pmulld %xmm1, %xmm2
-; SLM64-NEXT:    pmulld %xmm1, %xmm3
-; SLM64-NEXT:    movdqa %xmm4, %xmm1
+; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
+; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; SLM64-NEXT:    pmulld %xmm5, %xmm0
+; SLM64-NEXT:    pmulld %xmm5, %xmm2
+; SLM64-NEXT:    pmulld %xmm5, %xmm1
+; SLM64-NEXT:    pmulld %xmm5, %xmm3
 ; SLM64-NEXT:    retq
 ;
 ; SLOW32-LABEL: test_mul_v16i32_v16i16_minsize:

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Fri Mar 30 21:54:32 2018
@@ -4817,8 +4817,8 @@ define <4 x i32> @test_pmulld(<4 x i32>
 ;
 ; SLM-LABEL: test_pmulld:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    pmulld %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT:    pmulld (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT:    pmulld %xmm1, %xmm0 # sched: [11:11.00]
+; SLM-NEXT:    pmulld (%rdi), %xmm0 # sched: [14:11.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: test_pmulld:
@@ -4883,14 +4883,14 @@ define <4 x i32> @test_pmulld(<4 x i32>
 ;
 ; BTVER2-SSE-LABEL: test_pmulld:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [4:2.00]
+; BTVER2-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [9:2.00]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: test_pmulld:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
+; BTVER2-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: test_pmulld:

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s Fri Mar 30 21:54:32 2018
@@ -19,7 +19,7 @@ vsqrtps     %ymm0, %ymm2
 
 # CHECK:      Iterations:     70
 # CHECK-NEXT: Instructions:   560
-# CHECK-NEXT: Total Cycles:   4415
+# CHECK-NEXT: Total Cycles:   4416
 # CHECK-NEXT: Dispatch Width: 2
 # CHECK-NEXT: IPC:            0.13
 
@@ -33,7 +33,7 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT: [6]: HasSideEffects
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]	Instructions:
-# CHECK-NEXT:  1      2     1.00                    	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      4     2.00                    	vpmulld	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.50                    	vpand	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      3     1.00                    	vcvttps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  1      2     1.00                    	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
@@ -62,12 +62,11 @@ vsqrtps     %ymm0, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   
-# CHECK-NEXT:  -      -      -     3.00   63.00  5.00   5.00    -      -      -     1.00   0.50   0.50   2.00   
-
+# CHECK-NEXT:  -      -      -     3.00   63.00  6.01   5.99    -      -      -     1.00   1.00   1.00   3.00   
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   	Instructions:
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -     0.50   0.50    -     	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -     2.00   1.00    -      -      -      -     0.03   0.97   2.00   	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -     0.01   0.99    -      -      -      -     0.97   0.03    -     	vpand	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -     	vcvttps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     	vaddps	%xmm0, %xmm1, %xmm2
@@ -80,20 +79,20 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT:      	          0123456789          0123456789          0123456789        
 # CHECK-NEXT: Index	0123456789          0123456789          0123456789          01234567
 
-# CHECK:      [0,0]	DeeER.    .    .    .    .    .    .    .    .    .    .    .    . .	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,1]	DeE-R.    .    .    .    .    .    .    .    .    .    .    .    . .	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,2]	.DeeeER   .    .    .    .    .    .    .    .    .    .    .    . .	vcvttps2dq	%xmm0, %xmm2
-# CHECK-NEXT: [0,3]	.DeeE-R   .    .    .    .    .    .    .    .    .    .    .    . .	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,4]	. DeeeER  .    .    .    .    .    .    .    .    .    .    .    . .	vaddps	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,5]	. DeeeeeeeeeeeeeeeeeeeeeER    .    .    .    .    .    .    .    . .	vsqrtps	%xmm0, %xmm2
-# CHECK-NEXT: [0,6]	.  DeeeE-----------------R    .    .    .    .    .    .    .    . .	vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: [0,7]	.   D===================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER	vsqrtps	%ymm0, %ymm2
-
-# CHECK:      [1,0]	.    DeeE----------------------------------------------------------R	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,1]	.    DeE-----------------------------------------------------------R	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,2]	.    .DeeeE--------------------------------------------------------R	vcvttps2dq	%xmm0, %xmm2
-# CHECK-NEXT: [1,3]	.    .DeeE---------------------------------------------------------R	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,4]	.    . DeeeE-------------------------------------------------------R	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK:      [0,0]	DeeeeER   .    .    .    .    .    .    .    .    .    .    .    . .	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1]	.DeE--R   .    .    .    .    .    .    .    .    .    .    .    . .	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,2]	. DeeeER  .    .    .    .    .    .    .    .    .    .    .    . .	vcvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT: [0,3]	.  DeeE-R .    .    .    .    .    .    .    .    .    .    .    . .	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,4]	.  DeeeER .    .    .    .    .    .    .    .    .    .    .    . .	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,5]	.   DeeeeeeeeeeeeeeeeeeeeeER  .    .    .    .    .    .    .    . .	vsqrtps	%xmm0, %xmm2
+# CHECK-NEXT: [0,6]	.    DeeeE-----------------R  .    .    .    .    .    .    .    . .	vaddps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,7]	.     D===================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER	vsqrtps	%ymm0, %ymm2
+
+# CHECK:      [1,0]	.    .DeeeeE--------------------------------------------------------R	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1]	.    . DeE----------------------------------------------------------R	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,2]	.    .  DeeeE-------------------------------------------------------R	vcvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT: [1,3]	.    .  DeeE--------------------------------------------------------R	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,4]	.    .   DeeeE------------------------------------------------------R	vaddps	%xmm0, %xmm1, %xmm2
 
 
 # CHECK:      Average Wait times (based on the timeline view):
@@ -103,11 +102,11 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     1.0    1.0    29.0 	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.     2     1.0    1.0    28.0 	vpmulld	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 1.     2     1.0    1.0    30.0 	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2.     2     1.0    1.0    28.0 	vcvttps2dq	%xmm0, %xmm2
-# CHECK-NEXT: 3.     2     1.0    1.0    29.0 	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4.     2     1.0    1.0    27.5 	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2.     2     1.0    1.0    27.5 	vcvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT: 3.     2     1.0    1.0    28.5 	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4.     2     1.0    1.0    27.0 	vaddps	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 5.     1     1.0    1.0    0.0  	vsqrtps	%xmm0, %xmm2
 # CHECK-NEXT: 6.     1     1.0    1.0    17.0 	vaddps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 7.     1     20.0   20.0   0.0  	vsqrtps	%ymm0, %ymm2

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Fri Mar 30 21:54:32 2018
@@ -1518,8 +1518,8 @@ vzeroupper
 # CHECK-NEXT:  1      7     1.00    *               	vpmulhuw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      2     1.00                    	vpmulhw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      7     1.00    *               	vpmulhw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      2     1.00                    	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      7     1.00    *               	vpmulld	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  3      4     2.00                    	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      9     2.00    *               	vpmulld	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      2     1.00                    	vpmullw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      7     1.00    *               	vpmullw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      2     1.00                    	vpmuludq	%xmm0, %xmm1, %xmm2
@@ -2221,8 +2221,8 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	vpmulhuw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	vpmulhw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	vpmulhw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	vpmulld	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -     2.50   0.50    -      -      -      -     0.50   0.50   2.00   	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -      -      -     2.50   0.50   1.00    -      -      -     0.50   0.50   2.00   	vpmulld	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	vpmullw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	vpmullw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	vpmuludq	%xmm0, %xmm1, %xmm2

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s?rev=328914&r1=328913&r2=328914&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s Fri Mar 30 21:54:32 2018
@@ -247,8 +247,8 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     	pmovzxwq	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	pmuldq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	pmuldq	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     1.00   	pmulld	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -     1.00    -     1.00    -      -      -      -      -     1.00   	pmulld	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -      -     2.50   0.50    -      -      -      -     0.50   0.50   2.00   	pmulld	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -      -     2.50   0.50   1.00    -      -      -     0.50   0.50   2.00   	pmulld	(%rax), %xmm2
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     	ptest	%xmm0, %xmm1
 # CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -     1.00    -      -      -      -      -      -     	ptest	(%rax), %xmm1
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     	roundpd	$1, %xmm0, %xmm2




More information about the llvm-commits mailing list