[llvm] r333271 - [X86][SNB] Fix differences between vex/non-vex XMM vector moves (PR37286)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri May 25 05:18:11 PDT 2018


Author: rksimon
Date: Fri May 25 05:18:11 2018
New Revision: 333271

URL: http://llvm.org/viewvc/llvm-project?rev=333271&view=rev
Log:
[X86][SNB] Fix differences between vex/non-vex XMM vector moves (PR37286)

As confirmed by llvm-exegesis, there is no scheduler difference between MOVDQA/MOVDQU and VMOVDQA/VMOVDQU xmm reg-reg moves

Another chapter in the never ending crusade to remove useless InstRW overrides from the x86 scheduler models......

Modified:
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri May 25 05:18:11 2018
@@ -288,7 +288,7 @@ defm : X86WriteRes<WriteVecStoreNTY,
 defm : X86WriteRes<WriteVecMaskedStore,  [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
 defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
 defm : X86WriteRes<WriteVecMove,         [SBPort05], 1, [1], 1>;
-defm : X86WriteRes<WriteVecMoveX,        [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveX,        [SBPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveY,        [SBPort05], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveToGpr,    [SBPort0], 2, [1], 1>;
 defm : X86WriteRes<WriteVecMoveFromGpr,  [SBPort5], 1, [1], 1>;
@@ -526,14 +526,6 @@ def: InstRW<[SBWriteResGroup5], (instreg
                                            "MMX_PALIGNRrri",
                                            "MMX_PSIGN(B|D|W)rr")>;
 
-def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
-  let Latency = 1;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr",
-                                           "MOVDQ(A|U)rr")>; // NOTE: Different port requirements to VEX equivalents
-
 def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
   let Latency = 2;
   let NumMicroOps = 2;

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Fri May 25 05:18:11 2018
@@ -3386,7 +3386,7 @@ define <4 x i32> @test_pmaskmovd(i8* %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT:    vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaskmovd:
@@ -3484,7 +3484,7 @@ define <2 x i64> @test_pmaskmovq(i8* %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
 ; GENERIC-NEXT:    vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT:    vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaskmovq:

Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Fri May 25 05:18:11 2018
@@ -4536,7 +4536,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mask0:
@@ -4573,7 +4573,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mask1:
@@ -4610,7 +4610,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mask2:
@@ -4660,7 +4660,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_16xi8_perm_mask3:
@@ -5658,7 +5658,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mask0:
@@ -5695,7 +5695,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mask1:
@@ -5732,7 +5732,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mask2:
@@ -5782,7 +5782,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mask3:
@@ -5819,7 +5819,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mask4:
@@ -5856,7 +5856,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mask5:
@@ -5906,7 +5906,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mask6:
@@ -5943,7 +5943,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mask7:
@@ -7705,7 +7705,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mask0:
@@ -7742,7 +7742,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mask1:
@@ -7779,7 +7779,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mask2:
@@ -7829,7 +7829,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50]
-; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mask3:

Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s?rev=333271&r1=333270&r2=333271&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s Fri May 25 05:18:11 2018
@@ -1260,13 +1260,13 @@ vzeroupper
 # CHECK-NEXT:  1      6     0.50    *                   vmovddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%ymm0, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovddup	(%rax), %ymm2
-# CHECK-NEXT:  1      1     0.50                        vmovdqa	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vmovdqa	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00           *            vmovdqa	%xmm0, (%rax)
 # CHECK-NEXT:  1      6     0.50    *                   vmovdqa	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        vmovdqa	%ymm0, %ymm2
 # CHECK-NEXT:  1      1     1.00           *            vmovdqa	%ymm0, (%rax)
 # CHECK-NEXT:  1      7     0.50    *                   vmovdqa	(%rax), %ymm2
-# CHECK-NEXT:  1      1     0.50                        vmovdqu	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vmovdqu	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00           *            vmovdqu	%xmm0, (%rax)
 # CHECK-NEXT:  1      6     0.50    *                   vmovdqu	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        vmovdqu	%ymm0, %ymm2
@@ -1714,7 +1714,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     572.00 225.50 307.00 39.00  354.50 177.50 177.50
+# CHECK-NEXT:  -     572.00 225.17 307.67 39.00  354.17 177.50 177.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1960,13 +1960,13 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vmovddup	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup	%ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vmovddup	(%rax), %ymm2
-# CHECK-NEXT:  -      -     0.50    -      -     0.50    -      -     vmovdqa	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     vmovdqa	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vmovdqa	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vmovdqa	(%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50    -      -     0.50    -      -     vmovdqa	%ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vmovdqa	%ymm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vmovdqa	(%rax), %ymm2
-# CHECK-NEXT:  -      -     0.50    -      -     0.50    -      -     vmovdqu	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     vmovdqu	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vmovdqu	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vmovdqu	(%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50    -      -     0.50    -      -     vmovdqu	%ymm0, %ymm2




More information about the llvm-commits mailing list