[llvm] r327682 - [X86][Btver2] Fix ymm div/sqrt to use fmul unit
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 15 16:00:47 PDT 2018
Author: rksimon
Date: Thu Mar 15 16:00:47 2018
New Revision: 327682
URL: http://llvm.org/viewvc/llvm-project?rev=327682&view=rev
Log:
[X86][Btver2] Fix ymm div/sqrt to use fmul unit
YMM FDiv/FSqrt are dispatched on pipe JFPU1 but should be performed on the JFPM unit - that is where most of the cycles are spent.
This matches the pipes for WriteFSqrt/WriteFDiv definitions.
Modified:
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=327682&r1=327681&r2=327682&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Thu Mar 15 16:00:47 2018
@@ -488,15 +488,15 @@ def : InstRW<[JWriteFAddYLd, ReadAfterLd
VSUBPDYrm, VSUBPSYrm,
VADDSUBPDYrm, VADDSUBPSYrm)>;
-def JWriteFDivY: SchedWriteRes<[JFPU1]> {
+def JWriteFDivY: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 38;
- let ResourceCycles = [38];
+ let ResourceCycles = [1, 38];
}
def : InstRW<[JWriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>;
-def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1]> {
+def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 43;
- let ResourceCycles = [1, 38];
+ let ResourceCycles = [1, 1, 38];
}
def : InstRW<[JWriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>;
@@ -752,27 +752,27 @@ def JWriteVTESTLd: SchedWriteRes<[JLAGU,
}
def : InstRW<[JWriteVTESTLd], (instrs PTESTrm, VPTESTrm, VTESTPDrm, VTESTPSrm)>;
-def JWriteVSQRTYPD: SchedWriteRes<[JFPU1]> {
+def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 54;
- let ResourceCycles = [54];
+ let ResourceCycles = [1, 54];
}
def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
-def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1]> {
+def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 59;
- let ResourceCycles = [1, 54];
+ let ResourceCycles = [1, 1, 54];
}
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
-def JWriteVSQRTYPS: SchedWriteRes<[JFPU1]> {
+def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 42;
- let ResourceCycles = [42];
+ let ResourceCycles = [1, 42];
}
def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>;
-def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> {
+def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 47;
- let ResourceCycles = [1, 42];
+ let ResourceCycles = [1, 1, 42];
}
def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>;
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s?rev=327682&r1=327681&r2=327682&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s Thu Mar 15 16:00:47 2018
@@ -19,9 +19,9 @@ vsqrtps %ymm0, %ymm1
# CHECK: Iterations: 70
# CHECK-NEXT: Instructions: 560
-# CHECK-NEXT: Total Cycles: 3155
+# CHECK-NEXT: Total Cycles: 4484
# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: IPC: 0.12
# CHECK: Instruction Info:
@@ -61,8 +61,8 @@ vsqrtps %ymm0, %ymm1
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - - 21.00 5.00 45.00 - - - - - - 1.00
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - - - - 63.00 5.00 4.00 - - - - - - 1.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -73,28 +73,27 @@ vsqrtps %ymm0, %ymm1
# CHECK-NEXT: - - - - - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - vsqrtps %xmm0, %xmm1
# CHECK-NEXT: - - - - - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - 42.00 - - - - - - - vsqrtps %ymm0, %ymm1
+# CHECK-NEXT: - - - - 42.00 - 1.00 - - - - - - - vsqrtps %ymm0, %ymm1
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123456789 0123456789 0
+# CHECK-NEXT: 0123456789 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
-# CHECK: [0,0] DeeER. . . . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,1] DeE-R. . . . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,2] .DeeeER . . . . . . . . . . . . . vcvttps2dq %xmm0, %xmm1
-# CHECK-NEXT: [0,3] .D===eeER . . . . . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,4] . D===eeeER . . . . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . vsqrtps %xmm0, %xmm1
-# CHECK-NEXT: [0,6] . D====================eeeER . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: [0,7] . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . vsqrtps %ymm0, %ymm1
-
-# CHECK: [1,0] . D=========================================eeER. . . . . vpmulld %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,1] . D=========================================eE-R. . . . . vpand %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,2] . D=========================================eeeER . . . . vcvttps2dq %xmm0, %xmm1
-# CHECK-NEXT: [1,3] . D============================================eeER . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,4] . .D============================================eeeER . . . vaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,5] . .D=========================================eeeeeeeeeeeeeeeeeeeeeER vsqrtps %xmm0, %xmm1
+# CHECK: [0,0] DeeER. . . . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1] DeE-R. . . . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] .DeeeER . . . . . . . . . . . . . vcvttps2dq %xmm0, %xmm1
+# CHECK-NEXT: [0,3] .D===eeER . . . . . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,4] . D===eeeER . . . . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . vsqrtps %xmm0, %xmm1
+# CHECK-NEXT: [0,6] . D====================eeeER . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,7] . D====================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . vsqrtps %ymm0, %ymm1
+
+# CHECK: [1,0] . D=============================================================eeER vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1] . D=============================================================eE-R vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] . DeeeE-----------------------------------------------------------R vcvttps2dq %xmm0, %xmm1
+# CHECK-NEXT: [1,3] . D===eeE---------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,4] . .D===eeeE-------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2
# CHECK: Average Wait times (based on the timeline view):
@@ -104,11 +103,11 @@ vsqrtps %ymm0, %ymm1
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 21.5 0.5 0.0 vpmulld %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1. 2 21.5 0.5 1.0 vpand %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2. 2 21.5 21.5 0.0 vcvttps2dq %xmm0, %xmm1
-# CHECK-NEXT: 3. 2 24.5 0.0 0.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4. 2 24.5 1.0 0.0 vaddps %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5. 2 21.5 21.5 0.0 vsqrtps %xmm0, %xmm1
+# CHECK-NEXT: 0. 2 31.5 0.5 0.0 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 2 31.5 0.5 1.0 vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2. 2 1.0 1.0 29.5 vcvttps2dq %xmm0, %xmm1
+# CHECK-NEXT: 3. 2 4.0 0.0 28.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4. 2 4.0 1.0 27.5 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm1
# CHECK-NEXT: 6. 1 21.0 0.0 0.0 vaddps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 7. 1 1.0 1.0 0.0 vsqrtps %ymm0, %ymm1
+# CHECK-NEXT: 7. 1 21.0 21.0 0.0 vsqrtps %ymm0, %ymm1
More information about the llvm-commits
mailing list