[llvm] 5b8204b - [X86] SandyBridge ymm broadcast loads use port5 + port23

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 22 07:10:56 PDT 2023


Author: Simon Pilgrim
Date: 2023-09-22T15:10:27+01:00
New Revision: 5b8204b2215f293f69dded5be2774bec7bb1fde0

URL: https://github.com/llvm/llvm-project/commit/5b8204b2215f293f69dded5be2774bec7bb1fde0
DIFF: https://github.com/llvm/llvm-project/commit/5b8204b2215f293f69dded5be2774bec7bb1fde0.diff

LOG: [X86] SandyBridge ymm broadcast loads use port5 + port23

Unlike the per-lane mov*dup broadcast shuffles, broadcastsd/ss need port5 to splat across lanes

Found while reviewing a llvm-exegesis capture (and matches Agner + uops.info numbers) - I can't find any more easy wins from these captures so that will be it for now.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedSandyBridge.td
    llvm/test/CodeGen/X86/sqrt-fastmath.ll
    llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 821a0cd061bb516..7b33aed6351c1c1 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -882,9 +882,7 @@ def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
   let NumMicroOps = 1;
   let ReleaseAtCycles = [1];
 }
-def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
-                                         VBROADCASTSSYrm,
-                                         VMOVDDUPYrm,
+def: InstRW<[SBWriteResGroup54], (instrs VMOVDDUPYrm,
                                          VMOVSHDUPYrm,
                                          VMOVSLDUPYrm)>;
 

diff  --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 1c1df175bdb6f50..c0ad1a31c7d8ddf 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -499,9 +499,9 @@ define <8 x float> @v8f32_estimate(<8 x float> %x) #1 {
 ; AVX512-NEXT:    vrsqrtps %ymm0, %ymm1
 ; AVX512-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vbroadcastss {{.*#+}} ymm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
+; AVX512-NEXT:    vbroadcastss {{.*#+}} ymm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
 ; AVX512-NEXT:    vfmadd231ps {{.*#+}} ymm2 = (ymm1 * ymm0) + ymm2
-; AVX512-NEXT:    vbroadcastss {{.*#+}} ymm0 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; AVX512-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    vmulps %ymm3, %ymm1, %ymm0
 ; AVX512-NEXT:    vmulps %ymm2, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)

diff  --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
index aaa39b65b12154a..49db25cb0bdfb1e 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
@@ -1095,9 +1095,9 @@ vzeroupper
 # CHECK-NEXT:  2      2     1.00                        vblendvps	%ymm3, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  3      9     1.00    *                   vblendvps	%ymm3, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      7     1.00    *                   vbroadcastf128	(%rax), %ymm2
-# CHECK-NEXT:  1      7     0.50    *                   vbroadcastsd	(%rax), %ymm2
+# CHECK-NEXT:  2      7     1.00    *                   vbroadcastsd	(%rax), %ymm2
 # CHECK-NEXT:  1      6     0.50    *                   vbroadcastss	(%rax), %xmm2
-# CHECK-NEXT:  1      7     0.50    *                   vbroadcastss	(%rax), %ymm2
+# CHECK-NEXT:  2      7     1.00    *                   vbroadcastss	(%rax), %ymm2
 # CHECK-NEXT:  1      3     1.00                        vcmpeqpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   vcmpeqpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcmpeqpd	%ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     572.00 248.50 319.00 39.00  367.50 179.50 179.50
+# CHECK-NEXT:  -     572.00 248.50 319.00 39.00  369.50 179.50 179.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1803,9 +1803,9 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     vblendvps	%ymm3, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vblendvps	%ymm3, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf128	(%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vbroadcastsd	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastsd	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vbroadcastss	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vbroadcastss	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastss	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcmpeqpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vcmpeqpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcmpeqpd	%ymm0, %ymm1, %ymm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
index 0512e690cc8ad39..d6d157827b31410 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
@@ -1095,9 +1095,9 @@ vzeroupper
 # CHECK-NEXT:  2      2     1.00                        vblendvps	%ymm3, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  3      9     1.00    *                   vblendvps	%ymm3, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      7     1.00    *                   vbroadcastf128	(%rax), %ymm2
-# CHECK-NEXT:  1      7     0.50    *                   vbroadcastsd	(%rax), %ymm2
+# CHECK-NEXT:  2      7     1.00    *                   vbroadcastsd	(%rax), %ymm2
 # CHECK-NEXT:  1      6     0.50    *                   vbroadcastss	(%rax), %xmm2
-# CHECK-NEXT:  1      7     0.50    *                   vbroadcastss	(%rax), %ymm2
+# CHECK-NEXT:  2      7     1.00    *                   vbroadcastss	(%rax), %ymm2
 # CHECK-NEXT:  1      3     1.00                        vcmpeqpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   vcmpeqpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcmpeqpd	%ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     572.00 248.50 319.00 39.00  367.50 179.50 179.50
+# CHECK-NEXT:  -     572.00 248.50 319.00 39.00  369.50 179.50 179.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1803,9 +1803,9 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     vblendvps	%ymm3, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vblendvps	%ymm3, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf128	(%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vbroadcastsd	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastsd	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vbroadcastss	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vbroadcastss	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastss	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcmpeqpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vcmpeqpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vcmpeqpd	%ymm0, %ymm1, %ymm2


        


More information about the llvm-commits mailing list