[llvm] r326597 - [X86][BTVER2] Fix throughput of YMM bitwise instructions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 2 10:20:35 PST 2018


Author: rksimon
Date: Fri Mar  2 10:20:35 2018
New Revision: 326597

URL: http://llvm.org/viewvc/llvm-project?rev=326597&view=rev
Log:
[X86][BTVER2] Fix throughput of YMM bitwise instructions

These instructions are double-pumped, split into 2 128-bit ops and then passing through either FPU pipe.

Found while testing llvm-mca (D43951)

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=326597&r1=326596&r2=326597&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri Mar  2 10:20:35 2018
@@ -508,6 +508,26 @@ def : InstRW<[WriteCVTPH2PSYLd], (instrs
 // AVX instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
+def WriteLogicY: SchedWriteRes<[JFPU01]> {
+  let Latency = 1;
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+def : InstRW<[WriteLogicY], (instrs VORPDYrr, VORPSYrr,
+                                    VXORPDYrr, VXORPSYrr,
+                                    VANDPDYrr, VANDPSYrr,
+                                    VANDNPDYrr, VANDNPSYrr)>;
+
+def WriteLogicYLd: SchedWriteRes<[JLAGU, JFPU01]> {
+  let Latency = 6;
+  let ResourceCycles = [1, 2];
+  let NumMicroOps = 3;
+}
+def : InstRW<[WriteLogicYLd], (instrs VORPDYrm, VORPSYrm,
+                                      VXORPDYrm, VXORPSYrm,
+                                      VANDPDYrm, VANDPSYrm,
+                                      VANDNPDYrm, VANDNPSYrm)>;
+
 def WriteVDPPSY: SchedWriteRes<[JFPU1, JFPU0]> {
   let Latency = 12;
   let ResourceCycles = [6, 6];

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=326597&r1=326596&r2=326597&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Fri Mar  2 10:20:35 2018
@@ -272,7 +272,7 @@ define <4 x double> @test_andnotpd(<4 x
 ;
 ; BTVER2-LABEL: test_andnotpd:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -341,7 +341,7 @@ define <8 x float> @test_andnotps(<8 x f
 ;
 ; BTVER2-LABEL: test_andnotps:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -410,7 +410,7 @@ define <4 x double> @test_andpd(<4 x dou
 ;
 ; BTVER2-LABEL: test_andpd:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -477,7 +477,7 @@ define <8 x float> @test_andps(<8 x floa
 ;
 ; BTVER2-LABEL: test_andps:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -965,7 +965,7 @@ define <4 x double> @test_cmppd(<4 x dou
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
 ; BTVER2-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cmppd:
@@ -1031,7 +1031,7 @@ define <8 x float> @test_cmpps(<8 x floa
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
 ; BTVER2-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cmpps:
@@ -1415,7 +1415,7 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00]
 ; BTVER2-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvtps2dq:
@@ -1479,7 +1479,7 @@ define <8 x i32> @test_cvttps2dq(<8 x fl
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00]
 ; BTVER2-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvttps2dq:
@@ -3392,7 +3392,7 @@ define <4 x double> @orpd(<4 x double> %
 ;
 ; BTVER2-LABEL: orpd:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -3459,7 +3459,7 @@ define <8 x float> @test_orps(<8 x float
 ;
 ; BTVER2-LABEL: test_orps:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -5247,7 +5247,7 @@ define <4 x double> @test_xorpd(<4 x dou
 ;
 ; BTVER2-LABEL: test_xorpd:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -5314,7 +5314,7 @@ define <8 x float> @test_xorps(<8 x floa
 ;
 ; BTVER2-LABEL: test_xorps:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; BTVER2-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]




More information about the llvm-commits mailing list