[llvm] r327682 - [X86][Btver2] Fix ymm div/sqrt to use fmul unit

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 15 16:00:47 PDT 2018


Author: rksimon
Date: Thu Mar 15 16:00:47 2018
New Revision: 327682

URL: http://llvm.org/viewvc/llvm-project?rev=327682&view=rev
Log:
[X86][Btver2] Fix ymm div/sqrt to use fmul unit

YMM FDiv/FSqrt are dispatched on pipe JFPU1 but should be performed on the JFPM unit - that is where most of the cycles are spent.

This matches the pipes for WriteFSqrt/WriteFDiv definitions.

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=327682&r1=327681&r2=327682&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Thu Mar 15 16:00:47 2018
@@ -488,15 +488,15 @@ def : InstRW<[JWriteFAddYLd, ReadAfterLd
                                                    VSUBPDYrm, VSUBPSYrm,
                                                    VADDSUBPDYrm, VADDSUBPSYrm)>;
 
-def JWriteFDivY: SchedWriteRes<[JFPU1]> {
+def JWriteFDivY: SchedWriteRes<[JFPU1, JFPM]> {
   let Latency = 38;
-  let ResourceCycles = [38];
+  let ResourceCycles = [1, 38];
 }
 def : InstRW<[JWriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>;
 
-def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1]> {
+def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
   let Latency = 43;
-  let ResourceCycles = [1, 38];
+  let ResourceCycles = [1, 1, 38];
 }
 def : InstRW<[JWriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>;
 
@@ -752,27 +752,27 @@ def JWriteVTESTLd: SchedWriteRes<[JLAGU,
 }
 def : InstRW<[JWriteVTESTLd], (instrs PTESTrm, VPTESTrm, VTESTPDrm, VTESTPSrm)>;
 
-def JWriteVSQRTYPD: SchedWriteRes<[JFPU1]> {
+def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
   let Latency = 54;
-  let ResourceCycles = [54];
+  let ResourceCycles = [1, 54];
 }
 def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
 
-def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1]> {
+def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
   let Latency = 59;
-  let ResourceCycles = [1, 54];
+  let ResourceCycles = [1, 1, 54];
 }
 def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
 
-def JWriteVSQRTYPS: SchedWriteRes<[JFPU1]> {
+def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> {
   let Latency = 42;
-  let ResourceCycles = [42];
+  let ResourceCycles = [1, 42];
 }
 def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>;
 
-def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> {
+def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
   let Latency = 47;
-  let ResourceCycles = [1, 42];
+  let ResourceCycles = [1, 1, 42];
 }
 def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>;
 

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s?rev=327682&r1=327681&r2=327682&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s Thu Mar 15 16:00:47 2018
@@ -19,9 +19,9 @@ vsqrtps     %ymm0, %ymm1
 
 # CHECK:      Iterations:     70
 # CHECK-NEXT: Instructions:   560
-# CHECK-NEXT: Total Cycles:   3155
+# CHECK-NEXT: Total Cycles:   4484
 # CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.18
+# CHECK-NEXT: IPC:            0.12
 
 
 # CHECK:      Instruction Info:
@@ -61,8 +61,8 @@ vsqrtps     %ymm0, %ymm1
 
 
 # CHECK:      Resource pressure per iteration:
-# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT:  -      -      -      -     21.00  5.00   45.00   -      -      -      -      -      -     1.00
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   
+# CHECK-NEXT:  -      -      -      -     63.00  5.00   4.00    -      -      -      -      -      -     1.00   
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   	Instructions:
@@ -73,28 +73,27 @@ vsqrtps     %ymm0, %ymm1
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -      -     	vaddps	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -     21.00   -     1.00    -      -      -      -      -      -      -     	vsqrtps	%xmm0, %xmm1
 # CHECK-NEXT:  -      -      -      -      -     2.00    -      -      -      -      -      -      -      -     	vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -     42.00   -      -      -      -      -      -      -     	vsqrtps	%ymm0, %ymm1
+# CHECK-NEXT:  -      -      -      -     42.00   -     1.00    -      -      -      -      -      -      -     	vsqrtps	%ymm0, %ymm1
 
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:      	          0123456789          0123456789          0123456789          0
+# CHECK-NEXT:      	          0123456789          0123456789          0123456789          
 # CHECK-NEXT: Index	0123456789          0123456789          0123456789          0123456789
 
-# CHECK:      [0,0]	DeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,1]	DeE-R.    .    .    .    .    .    .    .    .    .    .    .    .    .	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,2]	.DeeeER   .    .    .    .    .    .    .    .    .    .    .    .    .	vcvttps2dq	%xmm0, %xmm1
-# CHECK-NEXT: [0,3]	.D===eeER .    .    .    .    .    .    .    .    .    .    .    .    .	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,4]	. D===eeeER    .    .    .    .    .    .    .    .    .    .    .    .	vaddps	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [0,5]	. DeeeeeeeeeeeeeeeeeeeeeER    .    .    .    .    .    .    .    .    .	vsqrtps	%xmm0, %xmm1
-# CHECK-NEXT: [0,6]	.  D====================eeeER .    .    .    .    .    .    .    .    .	vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: [0,7]	.  DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER  .    .    .    .    .	vsqrtps	%ymm0, %ymm1
-
-# CHECK:      [1,0]	.   D=========================================eeER.    .    .    .    .	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,1]	.   D=========================================eE-R.    .    .    .    .	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,2]	.    D=========================================eeeER   .    .    .    .	vcvttps2dq	%xmm0, %xmm1
-# CHECK-NEXT: [1,3]	.    D============================================eeER .    .    .    .	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,4]	.    .D============================================eeeER    .    .    .	vaddps	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,5]	.    .D=========================================eeeeeeeeeeeeeeeeeeeeeER	vsqrtps	%xmm0, %xmm1
+# CHECK:      [0,0]	DeeER.    .    .    .    .    .    .    .    .    .    .    .    .   .	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1]	DeE-R.    .    .    .    .    .    .    .    .    .    .    .    .   .	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,2]	.DeeeER   .    .    .    .    .    .    .    .    .    .    .    .   .	vcvttps2dq	%xmm0, %xmm1
+# CHECK-NEXT: [0,3]	.D===eeER .    .    .    .    .    .    .    .    .    .    .    .   .	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,4]	. D===eeeER    .    .    .    .    .    .    .    .    .    .    .   .	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,5]	. DeeeeeeeeeeeeeeeeeeeeeER    .    .    .    .    .    .    .    .   .	vsqrtps	%xmm0, %xmm1
+# CHECK-NEXT: [0,6]	.  D====================eeeER .    .    .    .    .    .    .    .   .	vaddps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT: [0,7]	.  D====================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER .	vsqrtps	%ymm0, %ymm1
+
+# CHECK:      [1,0]	.   D=============================================================eeER	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1]	.   D=============================================================eE-R	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,2]	.    DeeeE-----------------------------------------------------------R	vcvttps2dq	%xmm0, %xmm1
+# CHECK-NEXT: [1,3]	.    D===eeE---------------------------------------------------------R	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,4]	.    .D===eeeE-------------------------------------------------------R	vaddps	%xmm0, %xmm1, %xmm2
 
 
 # CHECK:      Average Wait times (based on the timeline view):
@@ -104,11 +103,11 @@ vsqrtps     %ymm0, %ymm1
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     21.5   0.5    0.0  	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1.     2     21.5   0.5    1.0  	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2.     2     21.5   21.5   0.0  	vcvttps2dq	%xmm0, %xmm1
-# CHECK-NEXT: 3.     2     24.5   0.0    0.0  	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4.     2     24.5   1.0    0.0  	vaddps	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5.     2     21.5   21.5   0.0  	vsqrtps	%xmm0, %xmm1
+# CHECK-NEXT: 0.     2     31.5   0.5    0.0  	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1.     2     31.5   0.5    1.0  	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2.     2     1.0    1.0    29.5 	vcvttps2dq	%xmm0, %xmm1
+# CHECK-NEXT: 3.     2     4.0    0.0    28.5 	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4.     2     4.0    1.0    27.5 	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 5.     1     1.0    1.0    0.0  	vsqrtps	%xmm0, %xmm1
 # CHECK-NEXT: 6.     1     21.0   0.0    0.0  	vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 7.     1     1.0    1.0    0.0  	vsqrtps	%ymm0, %ymm1
+# CHECK-NEXT: 7.     1     21.0   21.0   0.0  	vsqrtps	%ymm0, %ymm1




More information about the llvm-commits mailing list