[llvm] 0a6d797 - [X86] Improve F16C CVT schedules on SNB/HSW/BDW
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 24 09:32:02 PST 2024
Author: Simon Pilgrim
Date: 2024-11-24T17:04:53Z
New Revision: 0a6d797c20f6ab53bc09fb66129f603ed6e4b524
URL: https://github.com/llvm/llvm-project/commit/0a6d797c20f6ab53bc09fb66129f603ed6e4b524
DIFF: https://github.com/llvm/llvm-project/commit/0a6d797c20f6ab53bc09fb66129f603ed6e4b524.diff
LOG: [X86] Improve F16C CVT schedules on SNB/HSW/BDW
Add complete IvyBridge schedule (which is included in the SandyBridge model, IvyBridge was the first to support F16C) - split rr/rm schedules as they usually have very different port usage.
Haswell/Broadwell use Port1 not Port0.
Confirmed with a mixture of Agner + uops.info comparisons.
Added:
Modified:
llvm/lib/Target/X86/X86SchedBroadwell.td
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/lib/Target/X86/X86SchedSandyBridge.td
llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index e5b3cc4b6c90e6..5b50e1943e3db1 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -393,11 +393,11 @@ defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1,BWPort5], 6, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
-defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PS, [BWPort1,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [BWPort1,BWPort5], 2, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
-defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort1,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort1,BWPort23], 6, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 59874be34f5a28..d06e8a99370976 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -393,12 +393,12 @@ defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtPD2PSZ, [HWPort1,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
-defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort0,HWPort5], 2, [1,1], 2>; // Unsupported = 1
-defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort0,HWPort23], 7, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPH2PS, [HWPort1,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [HWPort1,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort1,HWPort5], 2, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort1,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort1,HWPort23], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort1,HWPort23], 7, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 6939b1227d0a61..775ad6b1078a53 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -361,16 +361,20 @@ defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
-defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
-defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
-defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
-
-defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1
-defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
-defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
+// F16C Instructions (IvyBridge+)
+defm : X86WriteRes<WriteCvtPH2PS, [SBPort0,SBPort5], 3, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [SBPort0,SBPort5], 3, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ, [SBPort0,SBPort5], 3, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPH2PSLd, [SBPort0,SBPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>; // Unsupported = 1
+
+defm : X86WriteRes<WriteCvtPS2PH, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; // Unsupported = 1
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
index 9fcd03bfb2fd45..07870d92dac555 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
@@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 4.00 - 0.67
+# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 4.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 - - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
index 7dea75f8f8fec0..4abcd6fc516b79 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s
@@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
-# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
-# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
-# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
-# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2
+# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
@@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
+# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
-# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
-# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
-# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
index 538ecf99074eda..d1fb824fee23db 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s
@@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
+# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 6.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
index a2ec86e8724faa..9284810b9e73be 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s
@@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
-# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
-# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
-# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
-# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
+# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2
+# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
@@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
+# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
-# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
-# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
-# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2
+# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
More information about the llvm-commits
mailing list