[llvm] r333271 - [X86][SNB] Fix differences between vex/non-vex XMM vector moves (PR37286)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 25 05:18:11 PDT 2018
Author: rksimon
Date: Fri May 25 05:18:11 2018
New Revision: 333271
URL: http://llvm.org/viewvc/llvm-project?rev=333271&view=rev
Log:
[X86][SNB] Fix differences between vex/non-vex XMM vector moves (PR37286)
As confirmed by llvm-exegesis, there is no scheduler difference between MOVDQA/MOVDQU and VMOVDQA/VMOVDQU xmm reg-reg moves
Another chapter in the never ending crusade to remove useless InstRW overrides from the x86 scheduler models......
Modified:
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri May 25 05:18:11 2018
@@ -288,7 +288,7 @@ defm : X86WriteRes<WriteVecStoreNTY,
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
-defm : X86WriteRes<WriteVecMoveX, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>;
@@ -526,14 +526,6 @@ def: InstRW<[SBWriteResGroup5], (instreg
"MMX_PALIGNRrri",
"MMX_PSIGN(B|D|W)rr")>;
-def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
- let Latency = 1;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr",
- "MOVDQ(A|U)rr")>; // NOTE: Different port requirements to VEX equivalents
-
def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
let Latency = 2;
let NumMicroOps = 2;
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Fri May 25 05:18:11 2018
@@ -3386,7 +3386,7 @@ define <4 x i32> @test_pmaskmovd(i8* %a0
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaskmovd:
@@ -3484,7 +3484,7 @@ define <2 x i64> @test_pmaskmovq(i8* %a0
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaskmovq:
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Fri May 25 05:18:11 2018
@@ -4536,7 +4536,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mask0:
@@ -4573,7 +4573,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mask1:
@@ -4610,7 +4610,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mask2:
@@ -4660,7 +4660,7 @@ define <16 x i8> @test_masked_16xi8_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_16xi8_perm_mask3:
@@ -5658,7 +5658,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mask0:
@@ -5695,7 +5695,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mask1:
@@ -5732,7 +5732,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mask2:
@@ -5782,7 +5782,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mask3:
@@ -5819,7 +5819,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mask4:
@@ -5856,7 +5856,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mask5:
@@ -5906,7 +5906,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mask6:
@@ -5943,7 +5943,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mask7:
@@ -7705,7 +7705,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mask0:
@@ -7742,7 +7742,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mask1:
@@ -7779,7 +7779,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mask2:
@@ -7829,7 +7829,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mask3:
Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s Fri May 25 05:18:11 2018
@@ -1260,13 +1260,13 @@ vzeroupper
# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2
# CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2
-# CHECK-NEXT: 1 1 0.50 vmovdqa %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovdqa %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * vmovdqa (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovdqa %ymm0, %ymm2
# CHECK-NEXT: 1 1 1.00 * vmovdqa %ymm0, (%rax)
# CHECK-NEXT: 1 7 0.50 * vmovdqa (%rax), %ymm2
-# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm0, %xmm2
+# CHECK-NEXT: 1 1 0.33 vmovdqu %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovdqu %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * vmovdqu (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovdqu %ymm0, %ymm2
@@ -1714,7 +1714,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 572.00 225.50 307.00 39.00 354.50 177.50 177.50
+# CHECK-NEXT: - 572.00 225.17 307.67 39.00 354.17 177.50 177.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1960,13 +1960,13 @@ vzeroupper
# CHECK-NEXT: - - - - - - 0.50 0.50 vmovddup (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - vmovddup %ymm0, %ymm2
# CHECK-NEXT: - - - - - - 0.50 0.50 vmovddup (%rax), %ymm2
-# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa %ymm0, %ymm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa %ymm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa (%rax), %ymm2
-# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu %ymm0, %ymm2
More information about the llvm-commits
mailing list