[llvm] [X86] Swap ports 10 and 11 in SapphireRapids Scheduling Model (PR #117468)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 23 22:53:02 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Aiden Grossman (boomanaiden154)
<details>
<summary>Changes</summary>
Based on intel/perfmon#<!-- -->149, the documentation is incorrect and the pfm counter names are actually correct. This patch adjusts the SapphireRapids scheduling model to match the performance counter naming/ correct naming that will soon be reflected in the optimization manual.
This fixes part of #<!-- -->117360.
---
Patch is 2.05 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117468.diff
66 Files Affected:
- (modified) llvm/lib/Target/X86/X86PfmCounters.td (+1-4)
- (modified) llvm/lib/Target/X86/X86SchedSapphireRapids.td (+413-413)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s (+11-11)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s (+7-7)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s (+323-323)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s (+153-153)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s (+593-593)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s (+9-9)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s (+17-17)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s (+222-222)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s (+433-433)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s (+25-25)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s (+49-49)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s (+205-205)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s (+332-332)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s (+16-16)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s (+31-31)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s (+13-13)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s (+25-25)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s (+9-9)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s (+16-16)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s (+67-67)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s (+129-129)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s (+31-31)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s (+960-960)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s (+25-25)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s (+49-49)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s (+2-2)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s (+13-13)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s (+25-25)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s (+7-7)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s (+9-9)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s (+14-14)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s (+18-18)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s (+2-2)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s (+2-2)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s (+49-49)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s (+97-97)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s (+46-46)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s (+47-47)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s (+2-2)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s (+59-59)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s (+119-119)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s (+11-11)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s (+45-45)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s (+11-11)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s (+33-33)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s (+2-2)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s (+2-2)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s (+694-694)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s (+43-43)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s (+6-6)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s (+3-3)
``````````diff
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 0c80f1eaadadb8..c15a2adc590f58 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -229,10 +229,7 @@ def SapphireRapidsPfmCounters : ProcPfmCounters {
let IssueCounters = [
PfmIssueCounter<"SPRPort00", "uops_dispatched:port_0">,
PfmIssueCounter<"SPRPort01", "uops_dispatched:port_1">,
- // The perfmon documentation and thus libpfm seems to incorrectly label
- // this performance counter, as ports 2,3, and 11 are actually grouped
- // according to most documentation. See #113941 for additional details.
- PfmIssueCounter<"SPRPort02_03_11", "uops_dispatched:port_2_3_10">,
+ PfmIssueCounter<"SPRPort02_03_10", "uops_dispatched:port_2_3_10">,
PfmIssueCounter<"SPRPort04_09", "uops_dispatched:port_4_9">,
PfmIssueCounter<"SPRPort05_11", "uops_dispatched:port_5_11">,
PfmIssueCounter<"SPRPort06", "uops_dispatched:port_6">,
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 8a23d1b103aa6b..e04ff68d278b2b 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -56,15 +56,15 @@ def SPRPort00_05 : ProcResGroup<[SPRPort00, SPRPort05]>;
def SPRPort00_05_06 : ProcResGroup<[SPRPort00, SPRPort05, SPRPort06]>;
def SPRPort00_06 : ProcResGroup<[SPRPort00, SPRPort06]>;
def SPRPort01_05 : ProcResGroup<[SPRPort01, SPRPort05]>;
-def SPRPort01_05_10 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort10]>;
+def SPRPort01_05_11 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort11]>;
def SPRPort02_03 : ProcResGroup<[SPRPort02, SPRPort03]>;
-def SPRPort02_03_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort11]>;
+def SPRPort02_03_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort10]>;
def SPRPort05_11 : ProcResGroup<[SPRPort05, SPRPort11]>;
def SPRPort07_08 : ProcResGroup<[SPRPort07, SPRPort08]>;
// EU has 112 reservation stations.
-def SPRPort00_01_05_06_10 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05,
- SPRPort06, SPRPort10]> {
+def SPRPort00_01_05_06_11 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05,
+ SPRPort06, SPRPort11]> {
let BufferSize = 112;
}
@@ -74,8 +74,8 @@ def SPRPort04_09 : ProcResGroup<[SPRPort04, SPRPort09]> {
}
// MEM has 72 reservation stations.
-def SPRPort02_03_07_08_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07,
- SPRPort08, SPRPort11]> {
+def SPRPort02_03_07_08_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07,
+ SPRPort08, SPRPort10]> {
let BufferSize = 72;
}
@@ -113,7 +113,7 @@ multiclass SPRWriteResPair<X86FoldableSchedWrite SchedRW,
// Memory variant also uses a cycle on port 2/3/11 and adds LoadLat cycles to
// the latency (default = 5).
- def : WriteRes<SchedRW.Folded, !listconcat([SPRPort02_03_11], ExePorts)> {
+ def : WriteRes<SchedRW.Folded, !listconcat([SPRPort02_03_10], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
@@ -126,71 +126,71 @@ multiclass SPRWriteResPair<X86FoldableSchedWrite SchedRW,
// Infered SchedWrite definition.
def : WriteRes<WriteADC, [SPRPort00_06]>;
-defm : X86WriteRes<WriteADCLd, [SPRPort00_01_05_06_10, SPRPort00_06], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteADCLd, [SPRPort00_01_05_06_11, SPRPort00_06], 11, [1, 1], 2>;
defm : SPRWriteResPair<WriteAESDecEnc, [SPRPort00_01], 5, [1], 1, 7>;
defm : SPRWriteResPair<WriteAESIMC, [SPRPort00_01], 8, [2], 2, 7>;
defm : X86WriteRes<WriteAESKeyGen, [SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort05], 7, [4, 1, 1, 2, 3, 3], 14>;
-defm : X86WriteRes<WriteAESKeyGenLd, [SPRPort00, SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort02_03_11, SPRPort05], 12, [4, 1, 2, 3, 1, 3], 14>;
-def : WriteRes<WriteALU, [SPRPort00_01_05_06_10]>;
-def : WriteRes<WriteALULd, [SPRPort00_01_05_06_10]> {
+defm : X86WriteRes<WriteAESKeyGenLd, [SPRPort00, SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort02_03_10, SPRPort05], 12, [4, 1, 2, 3, 1, 3], 14>;
+def : WriteRes<WriteALU, [SPRPort00_01_05_06_11]>;
+def : WriteRes<WriteALULd, [SPRPort00_01_05_06_11]> {
let Latency = 11;
}
defm : SPRWriteResPair<WriteBEXTR, [SPRPort00_06, SPRPort01], 6, [1, 1], 2>;
-defm : SPRWriteResPair<WriteBLS, [SPRPort01_05_10], 2, [1]>;
+defm : SPRWriteResPair<WriteBLS, [SPRPort01_05_11], 2, [1]>;
defm : SPRWriteResPair<WriteBSF, [SPRPort01], 3, [1]>;
defm : SPRWriteResPair<WriteBSR, [SPRPort01], 3, [1]>;
def : WriteRes<WriteBSWAP32, [SPRPort01]>;
defm : X86WriteRes<WriteBSWAP64, [SPRPort00_06, SPRPort01], 2, [1, 1], 2>;
defm : SPRWriteResPair<WriteBZHI, [SPRPort01], 3, [1]>;
def : WriteRes<WriteBitTest, [SPRPort01]>;
-defm : X86WriteRes<WriteBitTestImmLd, [SPRPort01, SPRPort02_03_11], 6, [1, 1], 2>;
-defm : X86WriteRes<WriteBitTestRegLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11], 11, [4, 2, 1, 2, 1], 10>;
+defm : X86WriteRes<WriteBitTestImmLd, [SPRPort01, SPRPort02_03_10], 6, [1, 1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10], 11, [4, 2, 1, 2, 1], 10>;
def : WriteRes<WriteBitTestSet, [SPRPort01]>;
def : WriteRes<WriteBitTestSetImmLd, [SPRPort01]> {
let Latency = 11;
}
-defm : X86WriteRes<WriteBitTestSetRegLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10], 17, [3, 2, 1, 2], 8>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11], 17, [3, 2, 1, 2], 8>;
defm : SPRWriteResPair<WriteBlend, [SPRPort01_05], 1, [1], 1, 7>;
defm : SPRWriteResPair<WriteBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
defm : SPRWriteResPair<WriteCLMul, [SPRPort05], 3, [1], 1, 7>;
defm : SPRWriteResPair<WriteCMOV, [SPRPort00_06], 1, [1], 1, 6>;
-defm : X86WriteRes<WriteCMPXCHG, [SPRPort00_01_05_06_10, SPRPort00_06], 3, [3, 2], 5>;
-defm : X86WriteRes<WriteCMPXCHGRMW, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 2, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteCMPXCHG, [SPRPort00_01_05_06_11, SPRPort00_06], 3, [3, 2], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 12, [1, 2, 1, 1, 1], 6>;
defm : SPRWriteResPair<WriteCRC32, [SPRPort01], 3, [1]>;
defm : X86WriteRes<WriteCvtI2PD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtI2PDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtI2PDY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtI2PDYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtI2PDZ, [SPRPort00], 4, [1], 1, 8>;
defm : SPRWriteResPair<WriteCvtI2PS, [SPRPort00_01], 4, [1], 1, 7>;
defm : SPRWriteResPair<WriteCvtI2PSY, [SPRPort00_01], 4, [1], 1, 8>;
defm : SPRWriteResPair<WriteCvtI2PSZ, [SPRPort00], 4, [1], 1, 8>;
defm : X86WriteRes<WriteCvtI2SD, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtI2SDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2SDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtI2SS, [SPRPort00_01, SPRPort00_01_05, SPRPort05], 9, [1, 1, 1], 3>;
-defm : X86WriteRes<WriteCvtI2SSLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2SSLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPD2I, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPD2ILd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2ILd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPD2IY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPD2IYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPD2IZ, [SPRPort00, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPD2IZLd, [SPRPort00, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IZLd, [SPRPort00, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPD2PS, [SPRPort00_01, SPRPort05], 5, [1, 1], 2, 7>;
defm : SPRWriteResPair<WriteCvtPD2PSY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2, 8>;
defm : SPRWriteResPair<WriteCvtPD2PSZ, [SPRPort00, SPRPort05], 7, [1, 1], 2, 8>;
defm : X86WriteRes<WriteCvtPH2PS, [SPRPort00_01, SPRPort05], 6, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SPRPort00_01, SPRPort05], 8, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPH2PSZ, [SPRPort00, SPRPort05], 11, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPS2I, [SPRPort00_01], 4, [1], 1, 7>;
defm : SPRWriteResPair<WriteCvtPS2IY, [SPRPort00_01], 4, [1], 1, 8>;
defm : X86WriteRes<WriteCvtPS2IZ, [SPRPort00, SPRPort00_05, SPRPort05], 10, [1, 2, 1], 4>;
-defm : X86WriteRes<WriteCvtPS2IZLd, [SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05], 18, [1, 2, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteCvtPS2IZLd, [SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05], 18, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteCvtPS2PD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPS2PDY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPS2PDZ, [SPRPort00, SPRPort05], 7, [1, 1], 2, 6>;
defm : X86WriteRes<WriteCvtPS2PH, [SPRPort00_01, SPRPort05], 6, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SPRPort00_01, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1], 3>;
@@ -202,12 +202,12 @@ defm : SPRWriteResPair<WriteCvtSD2I, [SPRPort00, SPRPort00_01], 7, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtSD2SS, [SPRPort00_01, SPRPort05], 5, [1, 1], 2, 7>;
defm : SPRWriteResPair<WriteCvtSS2I, [SPRPort00, SPRPort00_01], 7, [1, 1], 2>;
defm : X86WriteRes<WriteCvtSS2SD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtSS2SDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : SPRWriteResPair<WriteDPPD, [SPRPort00_01, SPRPort01_05], 9, [2, 1], 3, 7>;
defm : SPRWriteResPair<WriteDPPS, [SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort05], 14, [2, 1, 2, 1], 6, 7>;
defm : SPRWriteResPair<WriteDPPSY, [SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort05], 14, [2, 1, 2, 1], 6, 8>;
-defm : SPRWriteResPair<WriteDiv16, [SPRPort00_01_05_06_10, SPRPort01], 16, [1, 3], 4, 4>;
-defm : SPRWriteResPair<WriteDiv32, [SPRPort00_01_05_06_10, SPRPort01], 15, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteDiv16, [SPRPort00_01_05_06_11, SPRPort01], 16, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteDiv32, [SPRPort00_01_05_06_11, SPRPort01], 15, [1, 3], 4, 4>;
defm : SPRWriteResPair<WriteDiv64, [SPRPort01], 18, [3], 3>;
defm : X86WriteRes<WriteDiv8, [SPRPort01], 17, [3], 3>;
defm : X86WriteRes<WriteDiv8Ld, [SPRPort01], 22, [3], 3>;
@@ -235,7 +235,7 @@ defm : SPRWriteResPair<WriteFCmpY, [SPRPort00_01], 4, [1], 1, 8>;
def : WriteRes<WriteFCmpZ, [SPRPort05]> {
let Latency = 3;
}
-defm : X86WriteRes<WriteFCmpZLd, [SPRPort00, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteFCmpZLd, [SPRPort00, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteFCom, [SPRPort05], 1, [1], 1, 7>;
defm : SPRWriteResPair<WriteFComX, [SPRPort00], 3, [1]>;
defm : SPRWriteResPair<WriteFDiv, [SPRPort00], 11, [1], 1, 7>;
@@ -251,13 +251,13 @@ defm : SPRWriteResPair<WriteFHAddY, [SPRPort01_05, SPRPort05], 5, [1, 2], 3, 8>;
def : WriteRes<WriteFLD0, [SPRPort00_05]>;
defm : X86WriteRes<WriteFLD1, [SPRPort00_05], 1, [2], 2>;
defm : X86WriteRes<WriteFLDC, [SPRPort00_05], 1, [2], 2>;
-def : WriteRes<WriteFLoad, [SPRPort02_03_11]> {
+def : WriteRes<WriteFLoad, [SPRPort02_03_10]> {
let Latency = 7;
}
-def : WriteRes<WriteFLoadX, [SPRPort02_03_11]> {
+def : WriteRes<WriteFLoadX, [SPRPort02_03_10]> {
let Latency = 7;
}
-def : WriteRes<WriteFLoadY, [SPRPort02_03_11]> {
+def : WriteRes<WriteFLoadY, [SPRPort02_03_10]> {
let Latency = 8;
}
defm : SPRWriteResPair<WriteFLogic, [SPRPort00_01_05], 1, [1], 1, 7>;
@@ -270,8 +270,8 @@ defm : SPRWriteResPair<WriteFMAZ, [SPRPort00], 4, [1], 1, 8>;
def : WriteRes<WriteFMOVMSK, [SPRPort00]> {
let Latency = 3;
}
-defm : X86WriteRes<WriteFMaskedLoad, [SPRPort00_01_05, SPRPort02_03_11], 8, [1, 1], 2>;
-defm : X86WriteRes<WriteFMaskedLoadY, [SPRPort00_01_05, SPRPort02_03_11], 9, [1, 1], 2>;
+defm : X86WriteRes<WriteFMaskedLoad, [SPRPort00_01_05, SPRPort02_03_10], 8, [1, 1], 2>;
+defm : X86WriteRes<WriteFMaskedLoadY, [SPRPort00_01_05, SPRPort02_03_10], 9, [1, 1], 2>;
defm : X86WriteRes<WriteFMaskedStore32, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
defm : X86WriteRes<WriteFMaskedStore32Y, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
defm : X86WriteRes<WriteFMaskedStore64, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
@@ -334,15 +334,15 @@ defm : SPRWriteResPair<WriteFVarShuffleZ, [SPRPort05], 1, [1], 1, 8>;
def : WriteRes<WriteFence, [SPRPort00_06]> {
let Latency = 2;
}
-defm : SPRWriteResPair<WriteIDiv16, [SPRPort00_01_05_06_10, SPRPort01], 16, [1, 3], 4, 4>;
-defm : SPRWriteResPair<WriteIDiv32, [SPRPort00_01_05_06_10, SPRPort01], 15, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteIDiv16, [SPRPort00_01_05_06_11, SPRPort01], 16, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteIDiv32, [SPRPort00_01_05_06_11, SPRPort01], 15, [1, 3], 4, 4>;
defm : SPRWriteResPair<WriteIDiv64, [SPRPort01], 18, [3], 3>;
defm : X86WriteRes<WriteIDiv8, [SPRPort01], 17, [3], 3>;
defm : X86WriteRes<WriteIDiv8Ld, [SPRPort01], 22, [3], 3>;
-defm : SPRWriteResPair<WriteIMul16, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 5, [2, 1, 1], 4>;
-defm : SPRWriteResPair<WriteIMul16Imm, [SPRPort00_01_05_06_10, SPRPort01], 4, [1, 1], 2>;
+defm : SPRWriteResPair<WriteIMul16, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 5, [2, 1, 1], 4>;
+defm : SPRWriteResPair<WriteIMul16Imm, [SPRPort00_01_05_06_11, SPRPort01], 4, [1, 1], 2>;
defm : SPRWriteResPair<WriteIMul16Reg, [SPRPort01], 3, [1]>;
-defm : SPRWriteResPair<WriteIMul32, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 3>;
+defm : SPRWriteResPair<WriteIMul32, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 3>;
defm : SPRWriteResPair<WriteIMul32Imm, [SPRPort01], 3, [1]>;
defm : SPRWriteResPair<WriteIMul32Reg, [SPRPort01], 3, [1]>;
defm : SPRWriteResPair<WriteIMul64, [SPRPort01, SPRPort05], 4, [1, 1], 2>;
@@ -359,10 +359,10 @@ defm : SPRWriteResPair<WriteJump, [SPRPort00_06], 1, [1]>;
def : WriteRes<WriteLAHFSAHF, [SPRPort00_06]> {
let Latency = 3;
}
-defm : X86WriteRes<WriteLDMXCSR, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11], 7, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteLDMXCSR, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10], 7, [1, 1, 1, 1], 4>;
def : WriteRes<WriteLEA, [SPRPort01]>;
defm : SPRWriteResPair<WriteLZCNT, [SPRPort01], 3, [1]>;
-def : WriteRes<WriteLoad, [SPRPort02_03_11]> {
+def : WriteRes<WriteLoad, [SPRPort02_03_10]> {
let Latency = 5;
}
def : WriteRes<WriteMMXMOVMSK, [SPRPort00]> {
@@ -370,7 +370,7 @@ def : WriteRes<WriteMMXMOVMSK, [SPRPort00]> {
}
defm : SPRWriteResPair<WriteMPSAD, [SPRPort01_05, SPRPort05], 4, [1, 1], 2, 7>;
defm : SPRWriteResPair<WriteMPSADY, [SPRPort01_05, SPRPort05], 4, [1, 1], 2, 8>;
-defm : SPRWriteResPair<WriteMULX32, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 2>;
+defm : SPRWriteResPair<WriteMULX32, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 2>;
defm : SPRWriteResPair<WriteMULX64, [SPRPort01, SPRPort05], 4, [1, 1]>;
def : WriteRes<WriteMicrocoded, [SPRPort00_01_05_06]> {
let Latency = SapphireRapidsModel.MaxLatency;
@@ -380,9 +380,9 @@ def : WriteRes<WriteMove, [SPRPort00]> {
}
defm : X86WriteRes<WriteNop, [], 1, [], 0>;
defm : X86WriteRes<WritePCmpEStrI, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort05], 16, [3, 2, 1, 1, 1], 8>;
-defm : X86WriteRes<WritePCmpEStrILd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05], 31, [3, 1, 1, 1, 1, 1], 8>;
+defm : X86WriteRes<WritePCmpEStrILd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05], 31, [3, 1, 1, 1, 1, 1], 8>;
defm : X86WriteRes<WritePCmpEStrM, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort05], 16, [3, 3, 1, 1, 1], 9>;
-defm : X86WriteRes<WritePCmpEStrMLd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05], 17, [3, 2, 1, 1, 1, 1], 9>;
+defm : X86WriteRes<WritePCmpEStrMLd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05], 17, [3, 2, 1, 1, 1, 1], 9>;
defm : SPRWriteResPair<WritePCmpIStrI, [SPRPort00], 11, [3], 3, 20>;
defm : SPRWriteResPair<WritePCmpIStrM, [SPRPort00], 11, [3], 3>;
defm : SPRWriteResPair<WritePHAdd, [SPRPort00_05, SPRPort05], 3, [1, 2], 3, 8>;
@@ -397,16 +397,16 @@ defm : SPRWriteResPair<WritePSADBW, [SPRPort05], 3, [1], 1, 8>;
defm : SPRWriteResPair<WritePSADBWX, [SPRPort05], 3, [1], 1, 7>;
defm : SPRWriteResPair<WritePSADBWY, [SPRPort05], 3, [1], 1, 8>;
defm : SPRWriteResPair<WritePSADBWZ, [SPRPort05], 3, [1], 1, 8>;
-defm : X86WriteRes<WriteRMW, [SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 1, [1, 1, 1], 3>;
-defm : X86WriteRes<WriteRotate, [SPRPort00_01_05_06_10, SPRPort00_06], 2, [1, 2], 3>;
-defm : X86WriteRes<WriteRotateLd, [SPRPort00_01_05_06_10, SPRPort00_06], 12, [1, 2], 3>;
+defm : X86WriteRes<WriteRMW, [SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 1, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteRotate, [SPRPort00_01_05_06_11, SPRPort00_06], 2, [1, 2], 3>;
+defm : X86WriteRes<WriteRotateLd, [SPRPort00_01_05_06_11, SPRPort00_06], 12, [1, 2], 3>;
defm : X86WriteRes<WriteRotateCL, [SPRPort00_06], 2, [2], 2>;
-defm : X86WriteRes<WriteRotateCLLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 19, [2, 3, 2], 7>;
+defm : X86WriteRes<WriteRotateCLLd, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 19, [2, 3, 2], 7>;
defm : X86WriteRes<WriteSETCC, [SPRPort00_06], 2, [2], 2>;
defm : X86WriteRes<WriteSETCCStore, [SPRPort00_06, SPRPort04_09, SPRPort07_08], 13, [2, 1, 1], 4>;
-defm : X86WriteRes<WriteSHDmrcl, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1, 1], 6>;
-defm : X86WriteRes<WriteSHDmri, [SPRPort00_01_05_06_10, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1], 5>;
-defm : X86WriteRes<WriteSHDrrcl, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 5, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteSHDmrcl, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteSHDmri, [SPRPort00_01_05_06_11, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1], 5>;
+defm : X86WriteRes<WriteSHDrrcl, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 5, [1, 1, 1], 3>;
def : WriteRes<WriteSHDrri, [SPRPort01]> {
let Latency = 3;
}
@@ -434,7 +434,7 @@ defm : SPRWriteResPair<WriteVarBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
defm : SPRWriteResPair<WriteVarBlendZ, [SPRPort00_05], 1, [...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/117468
More information about the llvm-commits
mailing list