[llvm] r342728 - [X86][BtVer2] Fix latency and resource cycles of AVX 256-bit zero-idioms.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 21 05:43:07 PDT 2018


Author: adibiagio
Date: Fri Sep 21 05:43:07 2018
New Revision: 342728

URL: http://llvm.org/viewvc/llvm-project?rev=342728&view=rev
Log:
[X86][BtVer2] Fix latency and resource cycles of AVX 256-bit zero-idioms.

This patch introduces a SchedWriteVariant to describe zero-idiom VXORP(S|D)Yrr
and VANDNP(S|D)Yrr.

This is a follow-up of r342555.

On Jaguar, a VXORPSYrr is 2 macro opcodes. Only one opcode is eliminated at
register-renaming stage. The other opcode has to be executed to set the upper
half of the destination YMM.
Same for VANDNP(S|D)Yrr.

Differential Revision: https://reviews.llvm.org/D52347

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=342728&r1=342727&r2=342728&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri Sep 21 05:43:07 2018
@@ -595,6 +595,10 @@ def JWriteZeroLatency : SchedWriteRes<[]
   let Latency = 0;
 }
 
+def JWriteZeroIdiomYmm : SchedWriteRes<[JFPU01, JFPX]> {
+  let NumMicroOps = 2;
+}
+
 // Certain instructions that use the same register for both source
 // operands do not have a real dependency on the previous contents of the
 // register, and thus, do not have to wait before completing. They can be
@@ -619,6 +623,13 @@ def : InstRW<[JWriteFZeroIdiom], (instrs
                                          ANDNPSrr, VANDNPSrr,
                                          ANDNPDrr, VANDNPDrr)>;
 
+def JWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroIdiomYmm]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[JWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+                                          VANDNPSYrr, VANDNPDYrr)>;
+
 def JWriteVZeroIdiomLogic : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
     SchedVar<NoSchedPred,                          [WriteVecLogic]>

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=342728&r1=342727&r2=342728&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Fri Sep 21 05:43:07 2018
@@ -5489,10 +5489,10 @@ define void @test_avx256_zero_idioms() {
 ; BTVER2-LABEL: test_avx256_zero_idioms:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT:    vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
-; BTVER2-NEXT:    vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
-; BTVER2-NEXT:    vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
+; BTVER2-NEXT:    vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
+; BTVER2-NEXT:    vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.50]
+; BTVER2-NEXT:    vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.50]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s?rev=342728&r1=342727&r2=342728&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s Fri Sep 21 05:43:07 2018
@@ -35,12 +35,12 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      300
-# CHECK-NEXT: Total Cycles:      306
+# CHECK-NEXT: Total Cycles:      304
 # CHECK-NEXT: Total uOps:        600
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    1.96
-# CHECK-NEXT: IPC:               0.98
+# CHECK-NEXT: uOps Per Cycle:    1.97
+# CHECK-NEXT: IPC:               0.99
 # CHECK-NEXT: Block RThroughput: 3.0
 
 # CHECK:      Instruction Info:
@@ -53,7 +53,7 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  2      3     2.00                        vaddps	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT:  2      1     1.00                        vxorps	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  2      1     0.50                        vxorps	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  2      1     1.00                        vblendps	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      Resources:
@@ -74,27 +74,27 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT:  -      -      -     3.00   3.00   3.00   3.00    -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     3.00   2.00   3.00   2.00    -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
 # CHECK-NEXT:  -      -      -     2.00    -     2.00    -      -      -      -      -      -      -      -     vaddps	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -      -      -      -      -      -      -     vxorps	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     vxorps	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -      -     vblendps	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
+# CHECK-NEXT:                     0123
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeER    . .   vaddps	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: [0,1]     .DeE-R    . .   vxorps	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,2]     . DeE-R   . .   vblendps	$2, %ymm1, %ymm2, %ymm3
-# CHECK-NEXT: [1,0]     .  D=eeeER. .   vaddps	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: [1,1]     .   DeE--R. .   vxorps	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [1,2]     .    D=eE-R .   vblendps	$2, %ymm1, %ymm2, %ymm3
-# CHECK-NEXT: [2,0]     .    .DeeeER.   vaddps	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: [2,1]     .    . D=eER.   vxorps	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [2,2]     .    .  D=eER   vblendps	$2, %ymm1, %ymm2, %ymm3
+# CHECK:      [0,0]     DeeeER    .  .   vaddps	%ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1]     .DeE-R    .  .   vxorps	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,2]     . DeE-R   .  .   vblendps	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [1,0]     .  DeeeER .  .   vaddps	%ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [1,1]     .   DeE-R .  .   vxorps	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [1,2]     .    DeE-R.  .   vblendps	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [2,0]     .    .D=eeeER.   vaddps	%ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [2,1]     .    . DeE--R.   vxorps	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [2,2]     .    .  DeE--R   vblendps	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -104,19 +104,19 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     1.3    1.3    0.0       vaddps	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: 1.     3     1.3    1.3    1.0       vxorps	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 2.     3     1.7    0.3    0.7       vblendps	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 1.     3     1.0    1.0    1.3       vxorps	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2.     3     1.0    0.0    1.3       vblendps	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      [1] Code Region - ZERO-IDIOM-2
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      300
-# CHECK-NEXT: Total Cycles:      306
+# CHECK-NEXT: Total Cycles:      304
 # CHECK-NEXT: Total uOps:        600
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    1.96
-# CHECK-NEXT: IPC:               0.98
+# CHECK-NEXT: uOps Per Cycle:    1.97
+# CHECK-NEXT: IPC:               0.99
 # CHECK-NEXT: Block RThroughput: 3.0
 
 # CHECK:      Instruction Info:
@@ -129,7 +129,7 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  2      3     2.00                        vaddpd	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT:  2      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  2      1     0.50                        vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  2      1     1.00                        vblendpd	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      Resources:
@@ -150,27 +150,27 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT:  -      -      -     3.00   3.00   3.00   3.00    -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     3.00   2.00   3.00   2.00    -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
 # CHECK-NEXT:  -      -      -     2.00    -     2.00    -      -      -      -      -      -      -      -     vaddpd	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -      -     vblendpd	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
+# CHECK-NEXT:                     0123
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeER    . .   vaddpd	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: [0,1]     .DeE-R    . .   vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,2]     . DeE-R   . .   vblendpd	$2, %ymm1, %ymm2, %ymm3
-# CHECK-NEXT: [1,0]     .  D=eeeER. .   vaddpd	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: [1,1]     .   DeE--R. .   vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [1,2]     .    D=eE-R .   vblendpd	$2, %ymm1, %ymm2, %ymm3
-# CHECK-NEXT: [2,0]     .    .DeeeER.   vaddpd	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: [2,1]     .    . D=eER.   vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [2,2]     .    .  D=eER   vblendpd	$2, %ymm1, %ymm2, %ymm3
+# CHECK:      [0,0]     DeeeER    .  .   vaddpd	%ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1]     .DeE-R    .  .   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,2]     . DeE-R   .  .   vblendpd	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [1,0]     .  DeeeER .  .   vaddpd	%ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [1,1]     .   DeE-R .  .   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [1,2]     .    DeE-R.  .   vblendpd	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [2,0]     .    .D=eeeER.   vaddpd	%ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [2,1]     .    . DeE--R.   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [2,2]     .    .  DeE--R   vblendpd	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -180,8 +180,8 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     1.3    1.3    0.0       vaddpd	%ymm0, %ymm0, %ymm1
-# CHECK-NEXT: 1.     3     1.3    1.3    1.0       vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 2.     3     1.7    0.3    0.7       vblendpd	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: 1.     3     1.0    1.0    1.3       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2.     3     1.0    0.0    1.3       vblendpd	$2, %ymm1, %ymm2, %ymm3
 
 # CHECK:      [2] Code Region - ZERO-IDIOM-3
 
@@ -205,7 +205,7 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  2      3     2.00                        vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      1     1.00                        vandnps	%ymm2, %ymm2, %ymm3
+# CHECK-NEXT:  2      1     0.50                        vandnps	%ymm2, %ymm2, %ymm3
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - JALU0
@@ -225,12 +225,12 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT:  -      -      -     2.00   2.00   2.00   2.00    -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     2.00   1.00   2.00   1.00    -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
 # CHECK-NEXT:  -      -      -     2.00    -     2.00    -      -      -      -      -      -      -      -     vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm3
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm3
 
 # CHECK:      Timeline view:
 # CHECK-NEXT: Index     0123456789
@@ -274,7 +274,7 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  2      3     2.00                        vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      1     1.00                        vandnps	%ymm2, %ymm2, %ymm3
+# CHECK-NEXT:  2      1     0.50                        vandnps	%ymm2, %ymm2, %ymm3
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - JALU0
@@ -294,12 +294,12 @@ vandnps %ymm2, %ymm2, %ymm3
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT:  -      -      -     2.00   2.00   2.00   2.00    -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     2.00   1.00   2.00   1.00    -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
 # CHECK-NEXT:  -      -      -     2.00    -     2.00    -      -      -      -      -      -      -      -     vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -     2.00    -     2.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm3
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm3
 
 # CHECK:      Timeline view:
 # CHECK-NEXT: Index     0123456789




More information about the llvm-commits mailing list