[llvm] 5c0cb75 - [X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswell/IceLake

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 7 07:17:57 PST 2022


Author: Simon Pilgrim
Date: 2022-11-07T15:17:32Z
New Revision: 5c0cb75787b9a8a7fd488fb05b8350dc798aee9a

URL: https://github.com/llvm/llvm-project/commit/5c0cb75787b9a8a7fd488fb05b8350dc798aee9a
DIFF: https://github.com/llvm/llvm-project/commit/5c0cb75787b9a8a7fd488fb05b8350dc798aee9a.diff

LOG: [X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswell/IceLake

There can be a difference for MOVDDUPrr but not the load folded broadcast that is purely on Port23

Fixes an old TODO (inherited from SkylakeServer which was fixed at c7662dc3e52801ec824d8473278fb976107d3e57)

Confirmed on Agner + uops.info

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedHaswell.td
    llvm/lib/Target/X86/X86SchedIceLake.td
    llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
    llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 44fc1acf6b742..bd4cbe2469693 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -864,6 +864,7 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> {
 def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>;
 def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
                                            "(V?)MOVSLDUPrm",
+                                           "(V?)MOVDDUPrm",
                                            "VPBROADCAST(D|Q)rm")>;
 
 def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
@@ -881,13 +882,6 @@ def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128,
 def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m",
                                              "VPBROADCAST(D|Q)Yrm")>;
 
-def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
-  let Latency = 5;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[HWWriteResGroup0_2], (instregex "(V?)MOVDDUPrm")>;
-
 def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
   let Latency = 1;
   let NumMicroOps = 2;

diff  --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 43fb6eeacc256..331fafa6d2fe3 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1066,13 +1066,6 @@ def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
 }
 def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
 
-def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> {
-  let Latency = 5;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[ICXWriteResGroup58], (instregex "(V?)MOVDDUPrm")>;  // TODO: Should this be ICXWriteResGroup71?
-
 def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> {
   let Latency = 5;
   let NumMicroOps = 2;
@@ -1174,8 +1167,10 @@ def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
                                           VPBROADCASTQrm,
                                           VMOVSHDUPrm,
                                           VMOVSLDUPrm,
+                                          VMOVDDUPrm,
                                           MOVSHDUPrm,
-                                          MOVSLDUPrm)>;
+                                          MOVSLDUPrm,
+                                          MOVDDUPrm)>;
 
 def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
   let Latency = 6;

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index ec5c773330c86..ea7d251ffccef 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1269,7 +1269,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00                        vmovd	%xmm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   vmovddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   vmovddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%ymm0, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovddup	(%rax), %ymm2
 # CHECK-NEXT:  1      1     0.33                        vmovdqa	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
index 6832defc50e59..7085718405a44 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
@@ -58,7 +58,7 @@ mwait
 # CHECK-NEXT:  1      6     0.50    *                   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   movddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   movddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movshdup	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     0.50    *                   movshdup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movsldup	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
index fa0720f4cef57..383ddac8d16d0 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
@@ -1269,7 +1269,7 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vmovd	%xmm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   vmovddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   vmovddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        vmovddup	%ymm0, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovddup	(%rax), %ymm2
 # CHECK-NEXT:  1      1     0.33                        vmovdqa	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
index e09b9e0f757b3..4d1942450ec63 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s
@@ -58,7 +58,7 @@ mwait
 # CHECK-NEXT:  1      6     0.50    *                   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup	%xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   movddup	(%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   movddup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        movshdup	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     0.50    *                   movshdup	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        movsldup	%xmm0, %xmm2


        


More information about the llvm-commits mailing list