[llvm] r328501 - [X86][Btver2] Fix YMM BLENDPD/BLENDPS + UNPCKPD/UNPCKP instructions costs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 26 07:44:24 PDT 2018
Author: rksimon
Date: Mon Mar 26 07:44:24 2018
New Revision: 328501
URL: http://llvm.org/viewvc/llvm-project?rev=328501&view=rev
Log:
[X86][Btver2] Fix YMM BLENDPD/BLENDPS + UNPCKPD/UNPCKP instructions costs
These should match the YMM MOVDUP/ PERMILPD/PERMILPS + SHUFPD/SHUFPS shuffles instead of using the WriteFShuffle defaults.
Modified:
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/test/CodeGen/X86/avx-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=328501&r1=328500&r2=328501&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Mon Mar 26 07:44:24 2018
@@ -695,16 +695,20 @@ def JWriteShuffleY: SchedWriteRes<[JFPU0
let ResourceCycles = [2, 2];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
- VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri)>;
+def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
+ VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
+ VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri,
+ VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>;
def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 2];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
- VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi)>;
+def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
+ VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
+ VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi,
+ VUNPCKHPDYrm, VUNPCKHPSYrm, VUNPCKLPDYrm, VUNPCKLPSYrm)>;
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=328501&r1=328500&r2=328501&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Mon Mar 26 07:44:24 2018
@@ -544,9 +544,9 @@ define <4 x double> @test_blendpd(<4 x d
;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
+; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:1.00]
+; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blendpd:
@@ -607,8 +607,8 @@ define <8 x float> @test_blendps(<8 x fl
;
; BTVER2-LABEL: test_blendps:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
-; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:1.00]
+; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
+; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:2.00]
; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@@ -5013,8 +5013,8 @@ define <4 x double> @test_unpckhpd(<4 x
;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50]
-; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:1.00]
+; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
+; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:2.00]
; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@@ -5070,8 +5070,8 @@ define <8 x float> @test_unpckhps(<8 x f
;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50]
-; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
+; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_unpckhps:
@@ -5130,8 +5130,8 @@ define <4 x double> @test_unpcklpd(<4 x
;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50]
-; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:1.00]
+; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
+; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:2.00]
; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@@ -5187,8 +5187,8 @@ define <8 x float> @test_unpcklps(<8 x f
;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50]
-; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
+; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_unpcklps:
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=328501&r1=328500&r2=328501&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Mon Mar 26 07:44:24 2018
@@ -1071,12 +1071,12 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vandps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendpd $11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendpd $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vblendpd $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendps $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendps $11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vblendps $11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vblendps $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2
@@ -1676,20 +1676,20 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vucomiss (%rax), %xmm1
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpckhpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpckhps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpckhps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpcklpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vunpcklps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vunpcklps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vxorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vxorpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vxorpd %ymm0, %ymm1, %ymm2
More information about the llvm-commits
mailing list