[llvm] 5c0cb75 - [X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswell/IceLake
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 7 07:17:57 PST 2022
Author: Simon Pilgrim
Date: 2022-11-07T15:17:32Z
New Revision: 5c0cb75787b9a8a7fd488fb05b8350dc798aee9a
URL: https://github.com/llvm/llvm-project/commit/5c0cb75787b9a8a7fd488fb05b8350dc798aee9a
DIFF: https://github.com/llvm/llvm-project/commit/5c0cb75787b9a8a7fd488fb05b8350dc798aee9a.diff
LOG: [X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswell/IceLake
There can be a difference for MOVDDUPrr but not the load folded broadcast that is purely on Port23
Fixes an old TODO (inherited from SkylakeServer which was fixed at c7662dc3e52801ec824d8473278fb976107d3e57)
Confirmed on Agner + uops.info
Added:
Modified:
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/lib/Target/X86/X86SchedIceLake.td
llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 44fc1acf6b742..bd4cbe2469693 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -864,6 +864,7 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> {
def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>;
def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
"(V?)MOVSLDUPrm",
+ "(V?)MOVDDUPrm",
"VPBROADCAST(D|Q)rm")>;
def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
@@ -881,13 +882,6 @@ def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128,
def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m",
"VPBROADCAST(D|Q)Yrm")>;
-def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
- let Latency = 5;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[HWWriteResGroup0_2], (instregex "(V?)MOVDDUPrm")>;
-
def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
let Latency = 1;
let NumMicroOps = 2;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 43fb6eeacc256..331fafa6d2fe3 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1066,13 +1066,6 @@ def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
-def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> {
- let Latency = 5;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[ICXWriteResGroup58], (instregex "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71?
-
def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> {
let Latency = 5;
let NumMicroOps = 2;
@@ -1174,8 +1167,10 @@ def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
VPBROADCASTQrm,
VMOVSHDUPrm,
VMOVSLDUPrm,
+ VMOVDDUPrm,
MOVSHDUPrm,
- MOVSLDUPrm)>;
+ MOVSLDUPrm,
+ MOVDDUPrm)>;
def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
let Latency = 6;
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index ec5c773330c86..ea7d251ffccef 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1269,7 +1269,7 @@ vzeroupper
# CHECK-NEXT: 1 1 1.00 vmovd %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2
+# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2
# CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2
# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
index 6832defc50e59..7085718405a44 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
@@ -58,7 +58,7 @@ mwait
# CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 U monitor
# CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2
+# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2
# CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
index fa0720f4cef57..383ddac8d16d0 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
@@ -1269,7 +1269,7 @@ vzeroupper
# CHECK-NEXT: 1 2 1.00 vmovd %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2
+# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2
# CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2
# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
index e09b9e0f757b3..4d1942450ec63 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
@@ -58,7 +58,7 @@ mwait
# CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 U monitor
# CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2
+# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2
# CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2
More information about the llvm-commits
mailing list