[llvm] r357169 - [X86] AMD Piledriver (BdVer2): fine-tune some latencies
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 28 06:40:35 PDT 2019
Author: lebedevri
Date: Thu Mar 28 06:40:34 2019
New Revision: 357169
URL: http://llvm.org/viewvc/llvm-project?rev=357169&view=rev
Log:
[X86] AMD Piledriver (BdVer2): fine-tune some latencies
Based on llvm-exegesis measurements.
Now that llvm-exegesis is ~2 magnitudes faster, and is a bit smarter,
it is now possible to continue cleanup of the scheduler model.
With this, there are no more latency inconsistencies for the
opcodes that produce stable measurements, and only a few inconsistencies
for unstable measurements (MMX_* opcodes, opcodes that llvm-exegesis
measures by chaining - CMP, TEST, BT, SETcc, CVT, MOV, etc.)
Modified:
llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-xop.s
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td Thu Mar 28 06:40:34 2019
@@ -386,14 +386,8 @@ def PdWriteCMPXCHG16B : SchedWriteRes<[P
}
def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
-def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>;
-
def PdWriteXADD : SchedWriteRes<[PdEX1]> {
- let Latency = 2;
+ let Latency = 1;
let NumMicroOps = 4;
}
def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
@@ -426,7 +420,7 @@ defm : PdWriteResExPair<WriteIDiv16, [P
defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>;
defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
-defm : PdWriteResExPair<WriteCRC32, [PdEX01], 3, [4], 3>;
+defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>;
def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
let Latency = 5;
@@ -547,11 +541,17 @@ def PdWriteRCR16ri : SchedWriteRes<[PdEX
}
def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
-def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> {
+def PdWriteRCL32rCL : SchedWriteRes<[PdEX01]> {
let Latency = 7;
let NumMicroOps = 17;
}
-def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>;
+def : InstRW<[PdWriteRCL32rCL], (instrs RCL32rCL)>;
+
+def PdWriteRCL64rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 8;
+ let NumMicroOps = 17;
+}
+def : InstRW<[PdWriteRCL64rCL], (instrs RCL64rCL)>;
def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> {
let Latency = 7;
@@ -597,8 +597,8 @@ def PdWriteRCL8ri : SchedWriteRes<[PdEX0
def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
// SHLD/SHRD.
-defm : PdWriteRes<WriteSHDrri, [PdEX01], 4, [6], 6>;
-defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 4, [8], 7>;
+defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>;
+defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>;
def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
let Latency = 3;
@@ -608,7 +608,7 @@ def PdWriteSHLD32rri8SHRD16rri8 : SchedW
def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
- let Latency = 4;
+ let Latency = 3;
let ResourceCycles = [8];
let NumMicroOps = 7;
}
@@ -715,7 +715,7 @@ defm : PdWriteResYMMPair<WriteDPPSY,
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
- let Latency = 25;
+ let Latency = 27;
let ResourceCycles = [1, 3];
let NumMicroOps = 17;
}
@@ -875,11 +875,11 @@ defm : X86WriteResPairUnsupported<WriteC
defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU1, PdFPSTO], 4, [], 2>;
// FIXME: .Folded version is one NumMicroOp *less*..
-def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 13;
let NumMicroOps = 2;
}
-def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>;
+def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>;
defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU1, PdFPSTO], 8, [], 2>;
defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
@@ -952,11 +952,20 @@ defm : PdWriteRes<WriteVecMaskedStore,
defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1>;
defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
-defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 10>;
-defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 10, [], 2>;
+def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+}
+def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>;
+
+def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 4;
+}
+def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>;
+
+defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>;
+defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [], 2>;
defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>;
defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2>;
@@ -982,17 +991,24 @@ defm : PdWriteResXMMPair<WritePMULLD,
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
-def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> {
+def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> {
let Latency = 4;
let ResourceCycles = [2, 1, 2, 1];
}
-def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
+def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
VPMACSSDQLrr)>;
defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 2], 9>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+ let NumMicroOps = 9;
+}
+def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>;
+
defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [], 2>;
defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [], 2>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
@@ -1010,6 +1026,12 @@ defm : PdWriteResXMMPair<WriteVarShuffle
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 2;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
+
defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
@@ -1041,7 +1063,7 @@ defm : X86WriteResPairUnsupported<WriteV
defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [], 2>;
defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [], 2>;
-defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [], 2>;
def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
@@ -1053,19 +1075,19 @@ def : InstRW<[PdWriteEXTRQ], (instrs EXT
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 14, [1, 2, 1], 7, 1>;
-defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 6, [1, 2, 1], 7, 2>;
+defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 2, 1], 7, 1>;
+defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 2, 1], 7, 2>;
-defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 15, [1, 2, 6, 4, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 2, 6, 4, 1, 1], 27, 1>;
defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 2, 6, 4, 1, 1], 27, 1>;
////////////////////////////////////////////////////////////////////////////////
// MOVMSK Instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
+defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
-defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
@@ -1113,7 +1135,7 @@ def : InstRW<[WritePHAdd.Folded], (instr
defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [], 5, 1>;
def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
- let Latency = 13;
+ let Latency = 12;
let NumMicroOps = 6;
}
def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s Thu Mar 28 06:40:34 2019
@@ -141,12 +141,12 @@ movq %rcx, %xmm0
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 500
-# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total Cycles: 515
# CHECK-NEXT: Total uOps: 1000
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 1.98
-# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: uOps Per Cycle: 1.94
+# CHECK-NEXT: IPC: 0.97
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
@@ -158,7 +158,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm0
+# CHECK-NEXT: 2 13 1.00 cvtsi2ssl %ecx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -197,12 +197,12 @@ movq %rcx, %xmm0
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 500
-# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total Cycles: 515
# CHECK-NEXT: Total uOps: 1000
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 1.98
-# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: uOps Per Cycle: 1.94
+# CHECK-NEXT: IPC: 0.97
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
@@ -214,7 +214,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm0
+# CHECK-NEXT: 2 13 1.00 cvtsi2sdl %ecx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -253,12 +253,12 @@ movq %rcx, %xmm0
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 500
-# CHECK-NEXT: Total Cycles: 262
+# CHECK-NEXT: Total Cycles: 263
# CHECK-NEXT: Total uOps: 1000
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 3.82
-# CHECK-NEXT: IPC: 1.91
+# CHECK-NEXT: uOps Per Cycle: 3.80
+# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 0.5
# CHECK: Instruction Info:
@@ -270,7 +270,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 10 0.50 movd %ecx, %xmm0
+# CHECK-NEXT: 2 11 0.50 movd %ecx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
@@ -309,12 +309,12 @@ movq %rcx, %xmm0
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 500
-# CHECK-NEXT: Total Cycles: 262
+# CHECK-NEXT: Total Cycles: 263
# CHECK-NEXT: Total uOps: 1000
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 3.82
-# CHECK-NEXT: IPC: 1.91
+# CHECK-NEXT: uOps Per Cycle: 3.80
+# CHECK-NEXT: IPC: 1.90
# CHECK-NEXT: Block RThroughput: 0.5
# CHECK: Instruction Info:
@@ -326,7 +326,7 @@ movq %rcx, %xmm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 10 0.50 movq %rcx, %xmm0
+# CHECK-NEXT: 2 11 0.50 movq %rcx, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s Thu Mar 28 06:40:34 2019
@@ -39,7 +39,7 @@ vsqrtps %ymm0, %ymm2
# CHECK-NEXT: 1 5 2.00 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 2 0.50 vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vcvttps2dq %xmm0, %xmm2
-# CHECK-NEXT: 6 13 1.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 6 12 1.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 5 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 9 10.50 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 5 2.00 vaddps %ymm0, %ymm1, %ymm2
@@ -92,15 +92,15 @@ vsqrtps %ymm0, %ymm2
# CHECK: [0,0] DeeeeeER . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,1] D=eeE--R . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,2] D==eeeeER . . . . . . . . . . . vcvttps2dq %xmm0, %xmm2
-# CHECK-NEXT: [0,3] .D=eeeeeeeeeeeeeER . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,4] . D=eeeeeE-------R . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,5] . D=eeeeeeeeeE---R . . . . . . . . . vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: [0,6] . D=eeeeeE------R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: [0,7] . D==eeeeeeeeeE--R . . . . . . . . . vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: [1,0] . D===eeeeeE----R . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,1] . DeeE----------R . . . . . . . . . vpand %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,2] . D====eeeeE----R . . . . . . . . . vcvttps2dq %xmm0, %xmm2
-# CHECK-NEXT: [1,3] . D=eeeeeeeeeeeeeER . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,3] .D=eeeeeeeeeeeeER . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,4] . D=eeeeeE------R . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,5] . D=eeeeeeeeeE--R . . . . . . . . . vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: [0,6] . D=eeeeeE-----R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,7] . D==eeeeeeeeeE-R . . . . . . . . . vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: [1,0] . D===eeeeeE---R . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1] . DeeE---------R . . . . . . . . . vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] . D====eeeeE---R . . . . . . . . . vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: [1,3] . D=eeeeeeeeeeeeER . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,4] . .D==================eeeeeER . . . . . . vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,5] . .D===================eeeeeeeeeER . . . . . vsqrtps %xmm0, %xmm2
# CHECK-NEXT: [1,6] . . D=======================================eeeeeER . . vaddps %ymm0, %ymm1, %ymm2
@@ -113,11 +113,11 @@ vsqrtps %ymm0, %ymm2
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 2.5 2.5 2.0 vpmulld %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1. 2 1.5 1.5 6.0 vpand %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2. 2 4.0 4.0 2.0 vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 0. 2 2.5 2.5 1.5 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 2 1.5 1.5 5.5 vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2. 2 4.0 4.0 1.5 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 3. 2 2.0 2.0 0.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4. 2 10.5 10.5 3.5 vaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5. 2 11.0 11.0 1.5 vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 6. 2 21.0 21.0 3.0 vaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 7. 2 22.0 22.0 1.0 vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 4. 2 10.5 10.5 3.0 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 5. 2 11.0 11.0 1.0 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 6. 2 21.0 21.0 2.5 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 7. 2 22.0 22.0 0.5 vsqrtps %ymm0, %ymm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s Thu Mar 28 06:40:34 2019
@@ -14,12 +14,12 @@ movdqu %xmm5, %xmm0
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 27
-# CHECK-NEXT: Total Cycles: 18
+# CHECK-NEXT: Total Cycles: 16
# CHECK-NEXT: Total uOps: 27
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 1.50
-# CHECK-NEXT: IPC: 1.50
+# CHECK-NEXT: uOps Per Cycle: 1.69
+# CHECK-NEXT: IPC: 1.69
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
@@ -38,8 +38,8 @@ movdqu %xmm5, %xmm0
# CHECK-NEXT: 1 1 0.50 movups %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 movapd %xmm2, %xmm3
# CHECK-NEXT: 1 1 0.50 movupd %xmm3, %xmm4
-# CHECK-NEXT: 1 2 0.50 movdqa %xmm4, %xmm5
-# CHECK-NEXT: 1 2 0.50 movdqu %xmm5, %xmm0
+# CHECK-NEXT: 1 1 0.50 movdqa %xmm4, %xmm5
+# CHECK-NEXT: 1 1 0.50 movdqu %xmm5, %xmm0
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 21
@@ -87,46 +87,46 @@ movdqu %xmm5, %xmm0
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - pxor %mm0, %mm0
-# CHECK-NEXT: - - - - - - - - - - 0.67 0.33 - - - 1.00 - - - - - - - movq %mm0, %mm1
+# CHECK-NEXT: - - - - - - - - - - 0.33 0.67 - - - 1.00 - - - - - - - movq %mm0, %mm1
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - xorps %xmm0, %xmm0
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - 0.33 0.67 - - - - - - - movaps %xmm0, %xmm1
-# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.33 0.67 - - - - - - - movups %xmm1, %xmm2
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - 1.00 - - - - - - - - movapd %xmm2, %xmm3
-# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.33 0.67 - - - - - - - movupd %xmm3, %xmm4
+# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - 1.00 - - - - - - - movups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - 1.00 - - - - 0.67 0.33 - - - - - - - movapd %xmm2, %xmm3
+# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.67 0.33 - - - - - - - movupd %xmm3, %xmm4
# CHECK-NEXT: - - - - - - - - - - 0.67 0.33 - - 1.00 - - - - - - - - movdqa %xmm4, %xmm5
-# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 0.33 0.67 - - - - - - - movdqu %xmm5, %xmm0
+# CHECK-NEXT: - - - - - - - - - - 0.33 0.67 - - 0.67 0.33 - - - - - - - movdqu %xmm5, %xmm0
# CHECK: Timeline view:
-# CHECK-NEXT: 01234567
+# CHECK-NEXT: 012345
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DR . . . . pxor %mm0, %mm0
-# CHECK-NEXT: [0,1] DeeER. . . . movq %mm0, %mm1
-# CHECK-NEXT: [0,2] D---R. . . . xorps %xmm0, %xmm0
-# CHECK-NEXT: [0,3] DeE-R. . . . movaps %xmm0, %xmm1
-# CHECK-NEXT: [0,4] .DeER. . . . movups %xmm1, %xmm2
-# CHECK-NEXT: [0,5] .D=eER . . . movapd %xmm2, %xmm3
-# CHECK-NEXT: [0,6] .D==eER . . . movupd %xmm3, %xmm4
-# CHECK-NEXT: [0,7] .D===eeER . . . movdqa %xmm4, %xmm5
-# CHECK-NEXT: [0,8] . D====eeER . . movdqu %xmm5, %xmm0
-# CHECK-NEXT: [1,0] . D-------R . . pxor %mm0, %mm0
-# CHECK-NEXT: [1,1] . DeeE----R . . movq %mm0, %mm1
-# CHECK-NEXT: [1,2] . D-------R . . xorps %xmm0, %xmm0
-# CHECK-NEXT: [1,3] . DeE-----R . . movaps %xmm0, %xmm1
-# CHECK-NEXT: [1,4] . D=eE----R . . movups %xmm1, %xmm2
-# CHECK-NEXT: [1,5] . D==eE---R . . movapd %xmm2, %xmm3
-# CHECK-NEXT: [1,6] . D===eE--R . . movupd %xmm3, %xmm4
-# CHECK-NEXT: [1,7] . D===eeE-R . . movdqa %xmm4, %xmm5
-# CHECK-NEXT: [1,8] . D=====eeER . . movdqu %xmm5, %xmm0
-# CHECK-NEXT: [2,0] . D--------R . . pxor %mm0, %mm0
-# CHECK-NEXT: [2,1] . D=eeE----R . . movq %mm0, %mm1
-# CHECK-NEXT: [2,2] . D-------R . . xorps %xmm0, %xmm0
-# CHECK-NEXT: [2,3] . D==eE----R. . movaps %xmm0, %xmm1
-# CHECK-NEXT: [2,4] . D===eE---R. . movups %xmm1, %xmm2
-# CHECK-NEXT: [2,5] . D====eE--R. . movapd %xmm2, %xmm3
-# CHECK-NEXT: [2,6] . .D====eE-R. . movupd %xmm3, %xmm4
-# CHECK-NEXT: [2,7] . .D=====eeER . movdqa %xmm4, %xmm5
-# CHECK-NEXT: [2,8] . .D=======eeER movdqu %xmm5, %xmm0
+# CHECK: [0,0] DR . . . pxor %mm0, %mm0
+# CHECK-NEXT: [0,1] DeeER. . . movq %mm0, %mm1
+# CHECK-NEXT: [0,2] D---R. . . xorps %xmm0, %xmm0
+# CHECK-NEXT: [0,3] DeE-R. . . movaps %xmm0, %xmm1
+# CHECK-NEXT: [0,4] .DeER. . . movups %xmm1, %xmm2
+# CHECK-NEXT: [0,5] .D=eER . . movapd %xmm2, %xmm3
+# CHECK-NEXT: [0,6] .D==eER . . movupd %xmm3, %xmm4
+# CHECK-NEXT: [0,7] .D===eER . . movdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,8] . D===eER . . movdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] . D-----R . . pxor %mm0, %mm0
+# CHECK-NEXT: [1,1] . DeeE--R . . movq %mm0, %mm1
+# CHECK-NEXT: [1,2] . D-----R . . xorps %xmm0, %xmm0
+# CHECK-NEXT: [1,3] . DeE---R. . movaps %xmm0, %xmm1
+# CHECK-NEXT: [1,4] . D=eE--R. . movups %xmm1, %xmm2
+# CHECK-NEXT: [1,5] . D==eE-R. . movapd %xmm2, %xmm3
+# CHECK-NEXT: [1,6] . D===eER. . movupd %xmm3, %xmm4
+# CHECK-NEXT: [1,7] . D===eER . movdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,8] . D====eER . movdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . D------R . pxor %mm0, %mm0
+# CHECK-NEXT: [2,1] . D==eeE-R . movq %mm0, %mm1
+# CHECK-NEXT: [2,2] . D-----R . xorps %xmm0, %xmm0
+# CHECK-NEXT: [2,3] . D==eE--R . movaps %xmm0, %xmm1
+# CHECK-NEXT: [2,4] . D===eE-R . movups %xmm1, %xmm2
+# CHECK-NEXT: [2,5] . D====eER . movapd %xmm2, %xmm3
+# CHECK-NEXT: [2,6] . .D====eER . movupd %xmm3, %xmm4
+# CHECK-NEXT: [2,7] . .D=====eER. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,8] . .D======eER movdqu %xmm5, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -135,12 +135,12 @@ movdqu %xmm5, %xmm0
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 0.0 0.0 5.0 pxor %mm0, %mm0
-# CHECK-NEXT: 1. 3 1.3 1.3 2.7 movq %mm0, %mm1
-# CHECK-NEXT: 2. 3 0.0 0.0 5.7 xorps %xmm0, %xmm0
-# CHECK-NEXT: 3. 3 1.7 1.7 3.3 movaps %xmm0, %xmm1
-# CHECK-NEXT: 4. 3 2.3 0.0 2.3 movups %xmm1, %xmm2
-# CHECK-NEXT: 5. 3 3.3 0.0 1.7 movapd %xmm2, %xmm3
-# CHECK-NEXT: 6. 3 4.0 0.0 1.0 movupd %xmm3, %xmm4
-# CHECK-NEXT: 7. 3 4.7 0.0 0.3 movdqa %xmm4, %xmm5
-# CHECK-NEXT: 8. 3 6.3 0.0 0.0 movdqu %xmm5, %xmm0
+# CHECK-NEXT: 0. 3 0.0 0.0 3.7 pxor %mm0, %mm0
+# CHECK-NEXT: 1. 3 1.7 1.7 1.0 movq %mm0, %mm1
+# CHECK-NEXT: 2. 3 0.0 0.0 4.3 xorps %xmm0, %xmm0
+# CHECK-NEXT: 3. 3 1.7 1.7 2.0 movaps %xmm0, %xmm1
+# CHECK-NEXT: 4. 3 2.3 0.0 1.0 movups %xmm1, %xmm2
+# CHECK-NEXT: 5. 3 3.3 0.0 0.3 movapd %xmm2, %xmm3
+# CHECK-NEXT: 6. 3 4.0 0.0 0.0 movupd %xmm3, %xmm4
+# CHECK-NEXT: 7. 3 4.7 0.0 0.0 movdqa %xmm4, %xmm5
+# CHECK-NEXT: 8. 3 5.3 0.0 0.0 movdqu %xmm5, %xmm0
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s Thu Mar 28 06:40:34 2019
@@ -11,12 +11,12 @@ vmovdqu %xmm5, %xmm0
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 21
-# CHECK-NEXT: Total Cycles: 17
+# CHECK-NEXT: Total Cycles: 15
# CHECK-NEXT: Total uOps: 21
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 1.24
-# CHECK-NEXT: IPC: 1.24
+# CHECK-NEXT: uOps Per Cycle: 1.40
+# CHECK-NEXT: IPC: 1.40
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
@@ -33,8 +33,8 @@ vmovdqu %xmm5, %xmm0
# CHECK-NEXT: 1 1 0.50 vmovups %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vmovapd %xmm2, %xmm3
# CHECK-NEXT: 1 1 0.50 vmovupd %xmm3, %xmm4
-# CHECK-NEXT: 1 2 0.50 vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: 1 2 0.50 vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: 1 1 0.50 vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm5, %xmm0
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 18
@@ -83,37 +83,37 @@ vmovdqu %xmm5, %xmm0
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: - - - - - - - - 0.33 0.67 - - - - 0.67 0.33 - - - - - - - vmovaps %xmm0, %xmm1
-# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.67 0.33 - - - - - - - vmovups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 1.00 - - - - - - - - vmovups %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - 0.33 0.67 - - - - - - - vmovapd %xmm2, %xmm3
-# CHECK-NEXT: - - - - - - - - 0.67 0.33 - - - - 0.33 0.67 - - - - - - - vmovupd %xmm3, %xmm4
-# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 0.33 0.67 - - - - - - - vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.67 0.33 - - - - - - - vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: - - - - - - - - 0.67 0.33 - - - - 0.67 0.33 - - - - - - - vmovupd %xmm3, %xmm4
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - 1.00 - - - - - - - vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.33 0.67 - - - - - - - vmovdqu %xmm5, %xmm0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456
+# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DR . . .. vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [0,1] DeER . . .. vmovaps %xmm0, %xmm1
-# CHECK-NEXT: [0,2] D=eER. . .. vmovups %xmm1, %xmm2
-# CHECK-NEXT: [0,3] D==eER . .. vmovapd %xmm2, %xmm3
-# CHECK-NEXT: [0,4] .D==eER . .. vmovupd %xmm3, %xmm4
-# CHECK-NEXT: [0,5] .D===eeER . .. vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: [0,6] .D=====eeER .. vmovdqu %xmm5, %xmm0
-# CHECK-NEXT: [1,0] .D--------R .. vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [1,1] . DeE-----R .. vmovaps %xmm0, %xmm1
-# CHECK-NEXT: [1,2] . D=eE----R .. vmovups %xmm1, %xmm2
-# CHECK-NEXT: [1,3] . D==eE----R .. vmovapd %xmm2, %xmm3
-# CHECK-NEXT: [1,4] . D===eE---R .. vmovupd %xmm3, %xmm4
-# CHECK-NEXT: [1,5] . D===eeE-R .. vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: [1,6] . D=====eeER .. vmovdqu %xmm5, %xmm0
-# CHECK-NEXT: [2,0] . D--------R .. vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [2,1] . D==eE----R .. vmovaps %xmm0, %xmm1
-# CHECK-NEXT: [2,2] . D===eE--R .. vmovups %xmm1, %xmm2
-# CHECK-NEXT: [2,3] . D====eE--R .. vmovapd %xmm2, %xmm3
-# CHECK-NEXT: [2,4] . D=====eE-R .. vmovupd %xmm3, %xmm4
-# CHECK-NEXT: [2,5] . D======eeER.. vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: [2,6] . D=======eeER vmovdqu %xmm5, %xmm0
+# CHECK: [0,0] DR . . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] DeER . . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [0,2] D=eER. . . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [0,3] D==eER . . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [0,4] .D==eER . . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [0,5] .D===eER . . vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,6] .D====eER . . vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] .D------R . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] . DeE---R . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [1,2] . D=eE--R . . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [1,3] . D==eE--R. . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [1,4] . D===eE-R. . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [1,5] . D===eER. . vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,6] . D====eER . vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . D------R . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] . D===eE-R . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [2,2] . D===eER . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [2,3] . D====eER . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [2,4] . D=====eER . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [2,5] . D======eER. vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,6] . D======eER vmovdqu %xmm5, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -122,10 +122,10 @@ vmovdqu %xmm5, %xmm0
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 0.0 0.0 5.3 vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: 1. 3 1.7 1.7 3.0 vmovaps %xmm0, %xmm1
-# CHECK-NEXT: 2. 3 2.7 0.3 2.0 vmovups %xmm1, %xmm2
-# CHECK-NEXT: 3. 3 3.7 0.0 2.0 vmovapd %xmm2, %xmm3
-# CHECK-NEXT: 4. 3 4.3 0.0 1.3 vmovupd %xmm3, %xmm4
-# CHECK-NEXT: 5. 3 5.0 0.0 0.3 vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: 6. 3 6.7 0.0 0.0 vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: 0. 3 0.0 0.0 4.0 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 3 2.0 2.0 1.3 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 2. 3 2.7 0.0 0.7 vmovups %xmm1, %xmm2
+# CHECK-NEXT: 3. 3 3.7 0.0 0.7 vmovapd %xmm2, %xmm3
+# CHECK-NEXT: 4. 3 4.3 0.0 0.3 vmovupd %xmm3, %xmm4
+# CHECK-NEXT: 5. 3 5.0 0.0 0.0 vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: 6. 3 5.7 0.0 0.0 vmovdqu %xmm5, %xmm0
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s Thu Mar 28 06:40:34 2019
@@ -1188,13 +1188,13 @@ vzeroupper
# CHECK-NEXT: 1 14 9.50 * vdivss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 15 15 1.50 vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 17 20 1.50 * vdppd $22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 17 25 1.50 vdpps $22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 17 27 1.50 vdpps $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 18 30 1.50 * vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 25 27 3.00 vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 29 32 3.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 0.50 vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * vextractf128 $1, %ymm0, (%rax)
-# CHECK-NEXT: 2 13 1.00 vextractps $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 13 1.00 * vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 3 11 1.00 vhaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 16 1.00 * vhaddpd (%rax), %xmm1, %xmm2
@@ -1264,21 +1264,21 @@ vzeroupper
# CHECK-NEXT: 2 2 1.00 vmovaps %ymm0, %ymm2
# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm0, (%rax)
# CHECK-NEXT: 2 5 0.50 * vmovaps (%rax), %ymm2
-# CHECK-NEXT: 2 10 0.50 vmovd %eax, %xmm2
+# CHECK-NEXT: 2 11 0.50 vmovd %eax, %xmm2
# CHECK-NEXT: 1 5 0.50 * vmovd (%rax), %xmm2
-# CHECK-NEXT: 1 10 1.00 vmovd %xmm0, %ecx
+# CHECK-NEXT: 1 11 1.00 vmovd %xmm0, %ecx
# CHECK-NEXT: 1 2 1.00 * vmovd %xmm0, (%rax)
# CHECK-NEXT: 1 2 0.50 vmovddup %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 vmovddup %ymm0, %ymm2
# CHECK-NEXT: 2 7 1.00 * vmovddup (%rax), %ymm2
-# CHECK-NEXT: 1 2 0.50 vmovdqa %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.50 vmovdqa %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovdqa %xmm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * vmovdqa (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 vmovdqa %ymm0, %ymm2
# CHECK-NEXT: 4 1 1.00 * vmovdqa %ymm0, (%rax)
# CHECK-NEXT: 2 5 0.50 * vmovdqa (%rax), %ymm2
-# CHECK-NEXT: 1 2 0.50 vmovdqu %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovdqu %xmm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * vmovdqu (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 vmovdqu %ymm0, %ymm2
@@ -1294,10 +1294,10 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 * vmovlps %xmm0, (%rax)
# CHECK-NEXT: 1 7 0.50 * vmovlps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 10 1.00 vmovmskpd %xmm0, %ecx
-# CHECK-NEXT: 2 10 1.00 vmovmskpd %ymm0, %ecx
-# CHECK-NEXT: 2 10 1.00 vmovmskps %xmm0, %ecx
-# CHECK-NEXT: 2 10 1.00 vmovmskps %ymm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vmovmskpd %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vmovmskpd %ymm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vmovmskps %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vmovmskps %ymm0, %ecx
# CHECK-NEXT: 1 2 1.00 * vmovntdq %xmm0, (%rax)
# CHECK-NEXT: 4 2 2.00 * vmovntdq %ymm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * vmovntdqa (%rax), %xmm2
@@ -1307,9 +1307,9 @@ vzeroupper
# CHECK-NEXT: 1 3 1.00 * vmovntps %xmm0, (%rax)
# CHECK-NEXT: 4 3 2.00 * vmovntps %ymm0, (%rax)
# CHECK-NEXT: 1 2 0.50 vmovq %xmm0, %xmm2
-# CHECK-NEXT: 2 10 0.50 vmovq %rax, %xmm2
+# CHECK-NEXT: 2 11 0.50 vmovq %rax, %xmm2
# CHECK-NEXT: 1 5 0.50 * vmovq (%rax), %xmm2
-# CHECK-NEXT: 1 10 1.00 vmovq %xmm0, %rcx
+# CHECK-NEXT: 1 11 1.00 vmovq %xmm0, %rcx
# CHECK-NEXT: 1 2 1.00 * vmovq %xmm0, (%rax)
# CHECK-NEXT: 1 2 0.50 vmovsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 * vmovsd %xmm0, (%rax)
@@ -1337,7 +1337,7 @@ vzeroupper
# CHECK-NEXT: 2 2 1.00 vmovups %ymm0, %ymm2
# CHECK-NEXT: 8 1 1.00 * vmovups %ymm0, (%rax)
# CHECK-NEXT: 2 5 0.50 * vmovups (%rax), %ymm2
-# CHECK-NEXT: 9 9 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 9 8 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 9 14 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 1.00 vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vmulpd (%rax), %xmm1, %xmm2
@@ -1403,7 +1403,7 @@ vzeroupper
# CHECK-NEXT: 1 7 2.00 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 0.50 vpblendw $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.50 * vpblendw $11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 6 13 1.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 6 12 1.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 6 17 1.00 * vpclmulqdq $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 0.50 vpcmpeqb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.50 * vpcmpeqb (%rax), %xmm1, %xmm2
@@ -1413,8 +1413,8 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vpcmpeqq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 0.50 vpcmpeqw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.50 * vpcmpeqw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 27 15 4.00 vpcmpestri $1, %xmm0, %xmm2
-# CHECK-NEXT: 28 20 4.50 * vpcmpestri $1, (%rax), %xmm2
+# CHECK-NEXT: 27 14 4.00 vpcmpestri $1, %xmm0, %xmm2
+# CHECK-NEXT: 28 19 4.50 * vpcmpestri $1, (%rax), %xmm2
# CHECK-NEXT: 27 10 4.00 vpcmpestrm $1, %xmm0, %xmm2
# CHECK-NEXT: 28 15 4.50 * vpcmpestrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 vpcmpgtb %xmm0, %xmm1, %xmm2
@@ -1425,10 +1425,10 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vpcmpgtq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 0.50 vpcmpgtw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.50 * vpcmpgtw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 7 14 1.00 vpcmpistri $1, %xmm0, %xmm2
-# CHECK-NEXT: 8 19 1.00 * vpcmpistri $1, (%rax), %xmm2
-# CHECK-NEXT: 7 6 1.00 vpcmpistrm $1, %xmm0, %xmm2
-# CHECK-NEXT: 9 11 1.00 * vpcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: 7 11 1.00 vpcmpistri $1, %xmm0, %xmm2
+# CHECK-NEXT: 8 16 1.00 * vpcmpistri $1, (%rax), %xmm2
+# CHECK-NEXT: 7 7 1.00 vpcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 9 12 1.00 * vpcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 8 4 0.50 vperm2f128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 10 8 0.50 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 0.50 vpermilpd $1, %xmm0, %xmm2
@@ -1447,13 +1447,13 @@ vzeroupper
# CHECK-NEXT: 2 7 1.00 * vpermilps $1, (%rax), %ymm2
# CHECK-NEXT: 2 3 3.00 vpermilps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 3.00 * vpermilps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 2 13 1.00 vpextrb $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vpextrb $1, %xmm0, %ecx
# CHECK-NEXT: 2 13 1.00 * vpextrb $1, %xmm0, (%rax)
-# CHECK-NEXT: 2 13 1.00 vpextrd $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vpextrd $1, %xmm0, %ecx
# CHECK-NEXT: 2 13 1.00 * vpextrd $1, %xmm0, (%rax)
-# CHECK-NEXT: 2 13 1.00 vpextrq $1, %xmm0, %rcx
+# CHECK-NEXT: 2 12 1.00 vpextrq $1, %xmm0, %rcx
# CHECK-NEXT: 2 13 1.00 * vpextrq $1, %xmm0, (%rax)
-# CHECK-NEXT: 2 13 1.00 vpextrw $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: 2 13 1.00 * vpextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 3 5 0.50 vphaddd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 10 0.50 * vphaddd (%rax), %xmm1, %xmm2
@@ -1505,7 +1505,7 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vpminud (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 0.50 vpminuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.50 * vpminuw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 2 13 1.00 vpmovmskb %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 vpmovmskb %xmm0, %ecx
# CHECK-NEXT: 1 2 0.50 vpmovsxbd %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * vpmovsxbd (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 vpmovsxbq %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s Thu Mar 28 06:40:34 2019
@@ -165,13 +165,13 @@ pxor (%rax), %mm2
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 2 0.50 * * U emms
-# CHECK-NEXT: 2 10 0.50 movd %eax, %mm2
+# CHECK-NEXT: 2 11 0.50 movd %eax, %mm2
# CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm2
-# CHECK-NEXT: 1 10 1.00 movd %mm0, %ecx
+# CHECK-NEXT: 1 11 1.00 movd %mm0, %ecx
# CHECK-NEXT: 1 2 1.00 * U movd %mm0, (%rax)
-# CHECK-NEXT: 2 10 0.50 movq %rax, %mm2
+# CHECK-NEXT: 2 11 0.50 movq %rax, %mm2
# CHECK-NEXT: 1 5 0.50 * movq (%rax), %mm2
-# CHECK-NEXT: 1 10 1.00 movq %mm0, %rcx
+# CHECK-NEXT: 1 11 1.00 movq %mm0, %rcx
# CHECK-NEXT: 1 2 1.00 * movq %mm0, (%rax)
# CHECK-NEXT: 1 2 0.50 packsswb %mm0, %mm2
# CHECK-NEXT: 1 7 0.50 * packsswb (%rax), %mm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s Thu Mar 28 06:40:34 2019
@@ -212,7 +212,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 9 1.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 1 9 1.00 * cvtps2pi (%rax), %mm2
-# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm2
+# CHECK-NEXT: 2 13 1.00 cvtsi2ssl %ecx, %xmm2
# CHECK-NEXT: 2 13 1.00 cvtsi2ssq %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
@@ -249,7 +249,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 * movlps %xmm0, (%rax)
# CHECK-NEXT: 1 7 0.50 * movlps (%rax), %xmm2
-# CHECK-NEXT: 2 10 1.00 movmskps %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 1 2 0.50 movss %xmm0, %xmm2
@@ -268,7 +268,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * pavgb (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 pavgw %mm0, %mm2
# CHECK-NEXT: 1 7 0.50 * pavgw (%rax), %mm2
-# CHECK-NEXT: 2 13 1.00 pextrw $1, %mm0, %ecx
+# CHECK-NEXT: 2 12 1.00 pextrw $1, %mm0, %ecx
# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %mm2
# CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 pmaxsw %mm0, %mm2
@@ -279,7 +279,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * pminsw (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 pminub %mm0, %mm2
# CHECK-NEXT: 1 7 0.50 * pminub (%rax), %mm2
-# CHECK-NEXT: 2 13 1.00 pmovmskb %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 pmovmskb %xmm0, %ecx
# CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2
# CHECK-NEXT: 1 9 1.00 * pmulhuw (%rax), %mm2
# CHECK-NEXT: 1 5 0.50 * * prefetcht0 (%rax)
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s Thu Mar 28 06:40:34 2019
@@ -444,7 +444,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 18 1.00 * cvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 4 1.00 cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: 1 9 1.00 * cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm2
+# CHECK-NEXT: 2 13 1.00 cvtsi2sdl %ecx, %xmm2
# CHECK-NEXT: 2 13 1.00 cvtsi2sdq %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
@@ -478,32 +478,32 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movapd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * movapd (%rax), %xmm2
-# CHECK-NEXT: 2 10 0.50 movd %eax, %xmm2
+# CHECK-NEXT: 2 11 0.50 movd %eax, %xmm2
# CHECK-NEXT: 1 5 0.50 * movd (%rax), %xmm2
-# CHECK-NEXT: 1 10 1.00 movd %xmm0, %ecx
+# CHECK-NEXT: 1 11 1.00 movd %xmm0, %ecx
# CHECK-NEXT: 1 2 1.00 * movd %xmm0, (%rax)
-# CHECK-NEXT: 1 2 0.50 movdqa %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.50 movdqa %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * movdqa (%rax), %xmm2
-# CHECK-NEXT: 1 2 0.50 movdqu %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.50 movdqu %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * movdqu (%rax), %xmm2
-# CHECK-NEXT: 1 2 0.50 movdq2q %xmm0, %mm2
+# CHECK-NEXT: 1 1 0.50 movdq2q %xmm0, %mm2
# CHECK-NEXT: 2 2 1.00 * movhpd %xmm0, (%rax)
# CHECK-NEXT: 1 7 0.50 * movhpd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 * movlpd %xmm0, (%rax)
# CHECK-NEXT: 1 7 0.50 * movlpd (%rax), %xmm2
-# CHECK-NEXT: 2 10 1.00 movmskpd %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 movmskpd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntil %eax, (%rax)
# CHECK-NEXT: 1 1 1.00 * movntiq %rax, (%rax)
# CHECK-NEXT: 1 2 1.00 * movntdq %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 * movntpd %xmm0, (%rax)
# CHECK-NEXT: 1 2 0.50 movq %xmm0, %xmm2
-# CHECK-NEXT: 2 10 0.50 movq %rax, %xmm2
+# CHECK-NEXT: 2 11 0.50 movq %rax, %xmm2
# CHECK-NEXT: 1 5 0.50 * movq (%rax), %xmm2
-# CHECK-NEXT: 1 10 1.00 movq %xmm0, %rcx
+# CHECK-NEXT: 1 11 1.00 movq %xmm0, %rcx
# CHECK-NEXT: 1 2 1.00 * movq %xmm0, (%rax)
-# CHECK-NEXT: 1 2 0.50 movq2dq %mm0, %xmm2
+# CHECK-NEXT: 1 4 0.50 movq2dq %mm0, %xmm2
# CHECK-NEXT: 1 2 0.50 movsd %xmm0, %xmm2
# CHECK-NEXT: 1 2 1.00 * movsd %xmm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * movsd (%rax), %xmm2
@@ -560,7 +560,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * pcmpgtd (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pcmpgtw (%rax), %xmm2
-# CHECK-NEXT: 2 13 1.00 pextrw $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 pextrw $1, %xmm0, %ecx
# CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %xmm0
# CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %xmm0
# CHECK-NEXT: 1 4 1.00 pmaddwd %xmm0, %xmm2
@@ -573,7 +573,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * pminsw (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pminub %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pminub (%rax), %xmm2
-# CHECK-NEXT: 2 13 1.00 pmovmskb %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 pmovmskb %xmm0, %ecx
# CHECK-NEXT: 1 4 1.00 pmulhuw %xmm0, %xmm2
# CHECK-NEXT: 1 9 1.00 * pmulhuw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulhw %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s Thu Mar 28 06:40:34 2019
@@ -167,7 +167,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 17 20 1.50 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 16 25 1.50 dpps $22, %xmm0, %xmm2
# CHECK-NEXT: 18 30 1.50 * dpps $22, (%rax), %xmm2
-# CHECK-NEXT: 2 13 1.00 extractps $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 13 1.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 2 0.50 insertps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * insertps $1, (%rax), %xmm2
@@ -182,11 +182,11 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * pblendw $11, (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pcmpeqq %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pcmpeqq (%rax), %xmm2
-# CHECK-NEXT: 2 13 1.00 pextrb $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 pextrb $1, %xmm0, %ecx
# CHECK-NEXT: 2 13 1.00 * pextrb $1, %xmm0, (%rax)
-# CHECK-NEXT: 2 13 1.00 pextrd $1, %xmm0, %ecx
+# CHECK-NEXT: 2 12 1.00 pextrd $1, %xmm0, %ecx
# CHECK-NEXT: 2 13 1.00 * pextrd $1, %xmm0, (%rax)
-# CHECK-NEXT: 2 13 1.00 pextrq $1, %xmm0, %rcx
+# CHECK-NEXT: 2 12 1.00 pextrq $1, %xmm0, %rcx
# CHECK-NEXT: 2 13 1.00 * pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 2 13 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 2 4 1.00 phminposuw %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s Thu Mar 28 06:40:34 2019
@@ -40,24 +40,24 @@ pcmpgtq (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 3 3 2.00 crc32b %al, %ecx
-# CHECK-NEXT: 3 7 2.00 * crc32b (%rax), %ecx
+# CHECK-NEXT: 3 2 2.00 crc32b %al, %ecx
+# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %ecx
# CHECK-NEXT: 7 6 2.00 crc32l %eax, %ecx
-# CHECK-NEXT: 3 7 2.00 * crc32l (%rax), %ecx
+# CHECK-NEXT: 3 6 2.00 * crc32l (%rax), %ecx
# CHECK-NEXT: 5 5 2.00 crc32w %ax, %ecx
-# CHECK-NEXT: 3 7 2.00 * crc32w (%rax), %ecx
-# CHECK-NEXT: 3 3 2.00 crc32b %al, %rcx
-# CHECK-NEXT: 3 7 2.00 * crc32b (%rax), %rcx
+# CHECK-NEXT: 3 6 2.00 * crc32w (%rax), %ecx
+# CHECK-NEXT: 3 2 2.00 crc32b %al, %rcx
+# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %rcx
# CHECK-NEXT: 11 10 2.00 crc32q %rax, %rcx
-# CHECK-NEXT: 3 7 2.00 * crc32q (%rax), %rcx
-# CHECK-NEXT: 27 15 4.00 pcmpestri $1, %xmm0, %xmm2
-# CHECK-NEXT: 28 20 4.50 * pcmpestri $1, (%rax), %xmm2
+# CHECK-NEXT: 3 6 2.00 * crc32q (%rax), %rcx
+# CHECK-NEXT: 27 14 4.00 pcmpestri $1, %xmm0, %xmm2
+# CHECK-NEXT: 28 19 4.50 * pcmpestri $1, (%rax), %xmm2
# CHECK-NEXT: 27 10 4.00 pcmpestrm $1, %xmm0, %xmm2
# CHECK-NEXT: 28 15 4.50 * pcmpestrm $1, (%rax), %xmm2
-# CHECK-NEXT: 7 14 1.00 pcmpistri $1, %xmm0, %xmm2
-# CHECK-NEXT: 8 19 1.00 * pcmpistri $1, (%rax), %xmm2
-# CHECK-NEXT: 7 6 1.00 pcmpistrm $1, %xmm0, %xmm2
-# CHECK-NEXT: 9 11 1.00 * pcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: 7 11 1.00 pcmpistri $1, %xmm0, %xmm2
+# CHECK-NEXT: 8 16 1.00 * pcmpistri $1, (%rax), %xmm2
+# CHECK-NEXT: 7 7 1.00 pcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 9 12 1.00 * pcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 pcmpgtq %xmm0, %xmm2
# CHECK-NEXT: 1 7 0.50 * pcmpgtq (%rax), %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s Thu Mar 28 06:40:34 2019
@@ -1333,7 +1333,7 @@ xorq (%rax), %rdi
# CHECK-NEXT: 15 7 0.50 rcrq $7, %rdi
# CHECK-NEXT: 2 5 1.00 * rclq $7, (%rax)
# CHECK-NEXT: 2 5 1.00 * rcrq $7, (%rax)
-# CHECK-NEXT: 17 7 0.50 rclq %cl, %rdi
+# CHECK-NEXT: 17 8 0.50 rclq %cl, %rdi
# CHECK-NEXT: 16 7 0.50 rcrq %cl, %rdi
# CHECK-NEXT: 2 5 1.00 * rclq %cl, (%rax)
# CHECK-NEXT: 2 5 1.00 * rcrq %cl, (%rax)
@@ -1540,28 +1540,28 @@ xorq (%rax), %rdi
# CHECK-NEXT: 2 1 1.00 * setg (%rax)
# CHECK-NEXT: 1 1 0.50 setle %al
# CHECK-NEXT: 2 1 1.00 * setle (%rax)
-# CHECK-NEXT: 7 4 4.00 shldw %cl, %si, %di
-# CHECK-NEXT: 7 4 4.00 shrdw %cl, %si, %di
+# CHECK-NEXT: 7 3 4.00 shldw %cl, %si, %di
+# CHECK-NEXT: 7 3 4.00 shrdw %cl, %si, %di
# CHECK-NEXT: 8 4 11.00 * * shldw %cl, %si, (%rax)
# CHECK-NEXT: 8 4 11.00 * * shrdw %cl, %si, (%rax)
-# CHECK-NEXT: 6 4 3.00 shldw $7, %si, %di
+# CHECK-NEXT: 6 3 3.00 shldw $7, %si, %di
# CHECK-NEXT: 6 3 3.00 shrdw $7, %si, %di
# CHECK-NEXT: 8 4 11.00 * * shldw $7, %si, (%rax)
# CHECK-NEXT: 8 4 11.00 * * shrdw $7, %si, (%rax)
-# CHECK-NEXT: 7 4 4.00 shldl %cl, %esi, %edi
-# CHECK-NEXT: 7 4 4.00 shrdl %cl, %esi, %edi
+# CHECK-NEXT: 7 3 4.00 shldl %cl, %esi, %edi
+# CHECK-NEXT: 7 3 4.00 shrdl %cl, %esi, %edi
# CHECK-NEXT: 8 4 11.00 * * shldl %cl, %esi, (%rax)
# CHECK-NEXT: 8 4 11.00 * * shrdl %cl, %esi, (%rax)
# CHECK-NEXT: 6 3 3.00 shldl $7, %esi, %edi
-# CHECK-NEXT: 6 4 3.00 shrdl $7, %esi, %edi
+# CHECK-NEXT: 6 3 3.00 shrdl $7, %esi, %edi
# CHECK-NEXT: 8 4 11.00 * * shldl $7, %esi, (%rax)
# CHECK-NEXT: 8 4 11.00 * * shrdl $7, %esi, (%rax)
-# CHECK-NEXT: 7 4 4.00 shldq %cl, %rsi, %rdi
-# CHECK-NEXT: 7 4 4.00 shrdq %cl, %rsi, %rdi
+# CHECK-NEXT: 7 3 4.00 shldq %cl, %rsi, %rdi
+# CHECK-NEXT: 7 3 4.00 shrdq %cl, %rsi, %rdi
# CHECK-NEXT: 8 4 11.00 * * shldq %cl, %rsi, (%rax)
# CHECK-NEXT: 8 4 11.00 * * shrdq %cl, %rsi, (%rax)
-# CHECK-NEXT: 6 4 3.00 shldq $7, %rsi, %rdi
-# CHECK-NEXT: 6 4 3.00 shrdq $7, %rsi, %rdi
+# CHECK-NEXT: 6 3 3.00 shldq $7, %rsi, %rdi
+# CHECK-NEXT: 6 3 3.00 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 8 4 11.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 8 4 11.00 * * shrdq $7, %rsi, (%rax)
# CHECK-NEXT: 1 1 0.50 U stc
@@ -1628,18 +1628,18 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 5 0.50 * testq %rsi, (%rax)
# CHECK-NEXT: 1 100 0.50 * U ud2
# CHECK-NEXT: 1 100 0.50 U wrmsr
-# CHECK-NEXT: 4 2 1.00 xaddb %bl, %cl
+# CHECK-NEXT: 4 1 1.00 xaddb %bl, %cl
# CHECK-NEXT: 4 6 1.00 * * xaddb %bl, (%rcx)
-# CHECK-NEXT: 4 2 1.00 xaddw %bx, %cx
+# CHECK-NEXT: 4 1 1.00 xaddw %bx, %cx
# CHECK-NEXT: 4 6 1.00 * * xaddw %ax, (%rbx)
-# CHECK-NEXT: 4 2 1.00 xaddl %ebx, %ecx
+# CHECK-NEXT: 4 1 1.00 xaddl %ebx, %ecx
# CHECK-NEXT: 4 6 1.00 * * xaddl %eax, (%rbx)
-# CHECK-NEXT: 4 2 1.00 xaddq %rbx, %rcx
+# CHECK-NEXT: 4 1 1.00 xaddq %rbx, %rcx
# CHECK-NEXT: 4 6 1.00 * * xaddq %rax, (%rbx)
# CHECK-NEXT: 2 1 1.00 xchgb %bl, %cl
# CHECK-NEXT: 2 5 1.00 * * xchgb %bl, (%rbx)
# CHECK-NEXT: 2 1 1.00 xchgw %bx, %ax
-# CHECK-NEXT: 2 2 1.00 xchgw %bx, %cx
+# CHECK-NEXT: 2 1 1.00 xchgw %bx, %cx
# CHECK-NEXT: 2 5 1.00 * * xchgw %ax, (%rbx)
# CHECK-NEXT: 2 1 1.00 xchgl %ebx, %eax
# CHECK-NEXT: 2 1 1.00 xchgl %ebx, %ecx
Modified: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-xop.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-xop.s?rev=357169&r1=357168&r2=357169&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-xop.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-xop.s Thu Mar 28 06:40:34 2019
@@ -321,7 +321,7 @@ vpshlw %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 9 1.00 * vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 4 1.00 vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1 9 1.00 * vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
-# CHECK-NEXT: 1 3 2.00 vpperm %xmm0, %xmm1, %xmm2, %xmm3
+# CHECK-NEXT: 1 2 2.00 vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1 8 2.00 * vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1 8 2.00 * vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 3 0.50 vprotb %xmm0, %xmm1, %xmm3
More information about the llvm-commits
mailing list