[llvm] 5b8204b - [X86] SandyBridge ymm broadcast loads use port5 + port23
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 07:10:56 PDT 2023
Author: Simon Pilgrim
Date: 2023-09-22T15:10:27+01:00
New Revision: 5b8204b2215f293f69dded5be2774bec7bb1fde0
URL: https://github.com/llvm/llvm-project/commit/5b8204b2215f293f69dded5be2774bec7bb1fde0
DIFF: https://github.com/llvm/llvm-project/commit/5b8204b2215f293f69dded5be2774bec7bb1fde0.diff
LOG: [X86] SandyBridge ymm broadcast loads use port5 + port23
Unlike the per-lane mov*dup broadcast shuffles, broadcastsd/ss need port5 to splat across lanes
Found while reviewing a llvm-exegesis capture (and matches Agner + uops.info numbers) - I can't find any more easy wins from these captures so that will be it for now.
Added:
Modified:
llvm/lib/Target/X86/X86SchedSandyBridge.td
llvm/test/CodeGen/X86/sqrt-fastmath.ll
llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 821a0cd061bb516..7b33aed6351c1c1 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -882,9 +882,7 @@ def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
-def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
- VBROADCASTSSYrm,
- VMOVDDUPYrm,
+def: InstRW<[SBWriteResGroup54], (instrs VMOVDDUPYrm,
VMOVSHDUPYrm,
VMOVSLDUPYrm)>;
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 1c1df175bdb6f50..c0ad1a31c7d8ddf 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -499,9 +499,9 @@ define <8 x float> @v8f32_estimate(<8 x float> %x) #1 {
; AVX512-NEXT: vrsqrtps %ymm0, %ymm1
; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; AVX512-NEXT: vfmadd231ps {{.*#+}} ymm2 = (ymm1 * ymm0) + ymm2
-; AVX512-NEXT: vbroadcastss {{.*#+}} ymm0 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
-; AVX512-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: vmulps %ymm3, %ymm1, %ymm0
; AVX512-NEXT: vmulps %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
%sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
index aaa39b65b12154a..49db25cb0bdfb1e 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
@@ -1095,9 +1095,9 @@ vzeroupper
# CHECK-NEXT: 2 2 1.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 3 9 1.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 7 1.00 * vbroadcastf128 (%rax), %ymm2
-# CHECK-NEXT: 1 7 0.50 * vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: 2 7 1.00 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 0.50 * vbroadcastss (%rax), %xmm2
-# CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: 2 7 1.00 * vbroadcastss (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 572.00 248.50 319.00 39.00 367.50 179.50 179.50
+# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1803,9 +1803,9 @@ vzeroupper
# CHECK-NEXT: - - 1.00 - - 1.00 - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vblendvps %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vbroadcastf128 (%rax), %ymm2
-# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastss (%rax), %xmm2
-# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vbroadcastss (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
index 0512e690cc8ad39..d6d157827b31410 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
@@ -1095,9 +1095,9 @@ vzeroupper
# CHECK-NEXT: 2 2 1.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 3 9 1.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 7 1.00 * vbroadcastf128 (%rax), %ymm2
-# CHECK-NEXT: 1 7 0.50 * vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: 2 7 1.00 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 0.50 * vbroadcastss (%rax), %xmm2
-# CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: 2 7 1.00 * vbroadcastss (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 572.00 248.50 319.00 39.00 367.50 179.50 179.50
+# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1803,9 +1803,9 @@ vzeroupper
# CHECK-NEXT: - - 1.00 - - 1.00 - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vblendvps %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vbroadcastf128 (%rax), %ymm2
-# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastss (%rax), %xmm2
-# CHECK-NEXT: - - - - - - 0.50 0.50 vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vbroadcastss (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %ymm0, %ymm1, %ymm2
More information about the llvm-commits
mailing list