[llvm] [X86][Codegen] add sched for lnlp (PR #139446)

via llvm-commits llvm-commits at lists.llvm.org
Sun May 11 04:49:11 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Mahesh-Attarde (mahesh-attarde)

<details>
<summary>Changes</summary>

With this patch, we introduce scheduling model for Lunarlake P-core.


---

Patch is 1.12 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139446.diff


40 Files Affected:

- (modified) llvm/lib/Target/X86/X86.td (+4-1) 
- (added) llvm/lib/Target/X86/X86SchedLunarlakeP.td (+2409) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/independent-load-stores.s (+149) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/partially-overlapping-groups.s (+21) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-aes.s (+69) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-avx1.s (+2429) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-avx2.s (+1093) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-avxgfni.s (+83) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-avxvnni.s (+97) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-bmi2.s (+153) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-clflushopt.s (+45) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-clwb.s (+45) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-cmov.s (+335) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-cmpxchg.s (+54) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-f16c.s (+69) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-fma.s (+713) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-fsgsbase.s (+69) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-gfni.s (+62) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-lea.s (+449) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-lzcnt.s (+62) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-mmx.s (+405) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-movbe.s (+62) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-pclmul.s (+48) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-popcnt.s (+62) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-prefetchw.s (+48) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-rdrand.s (+51) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-rdseed.s (+51) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-sse1.s (+473) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-sse2.s (+972) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-sse3.s (+116) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-sse41.s (+378) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-sse42.s (+111) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-ssse3.s (+265) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-vaes.s (+69) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-vpclmulqdq.s (+48) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-x86_32.s (+90) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-x86_64.s (+2887) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-x87.s (+533) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-xsave.s (+61) 
- (added) llvm/test/tools/llvm-mca/X86/LunarlakeP/zero-idioms.s (+503) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 2d635835e3ff7..a31bde85e4d02 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -811,6 +811,7 @@ include "X86SchedSkylakeClient.td"
 include "X86SchedSkylakeServer.td"
 include "X86SchedIceLake.td"
 include "X86SchedAlderlakeP.td"
+include "X86SchedLunarlakeP.td"
 include "X86SchedSapphireRapids.td"
 
 //===----------------------------------------------------------------------===//
@@ -1862,10 +1863,12 @@ def : ProcModel<"meteorlake", AlderlakePModel,
                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
 def : ProcModel<"arrowlake", AlderlakePModel,
                 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
-foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
+foreach P = ["arrowlake-s", "arrowlake_s"] in {
 def : ProcModel<P, AlderlakePModel,
                 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
 }
+def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures,
+                ProcessorFeatures.ADLTuning>;
 def : ProcModel<"pantherlake", AlderlakePModel,
                 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
 def : ProcModel<"clearwaterforest", AlderlakePModel,
diff --git a/llvm/lib/Target/X86/X86SchedLunarlakeP.td b/llvm/lib/Target/X86/X86SchedLunarlakeP.td
new file mode 100644
index 0000000000000..e82d7ab805077
--- /dev/null
+++ b/llvm/lib/Target/X86/X86SchedLunarlakeP.td
@@ -0,0 +1,2409 @@
+//=----- X86SchedLunarlakeP.td - X86 LunarlakeP Scheduling *- tablegen -----*=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for LunarlakeP to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+def LunarlakePModel : SchedMachineModel {
+  // LunarlakeP can allocate 8 uops per cycle.
+  // Max micro-ops that may be scheduled per cycle.
+  // Based on Allocator Width
+  let IssueWidth = 8;  
+  // Max micro-ops that can be buffered. 
+  // Based on size of ROB
+  let MicroOpBufferSize = 792;
+  // INT LOAD takes 4 cycles
+  let LoadLatency = 4;
+  let MispredictPenalty = 14;
+  // Latency for microcoded instructions or instructions without latency info.
+  int MaxLatency = 100;
+  // Based on the LSD (loop-stream detector) queue size (ST).
+  // LSD is 200 uops per logical processor in single threaded mode
+  // For SMT 100 uops/thread, LionCove removed SMT in HW.
+  let LoopMicroOpBufferSize = 200;
+  // This flag is set to allow the scheduler to assign a default model to
+  // unrecognized opcodes.
+  let CompleteModel = 0;
+}
+
+let SchedModel = LunarlakePModel in {
+
+// LunarlakeP can issue micro-ops to 18 different ports in one cycle.
+// Lion Cove architectural spec uses port naming that is not sequential
+// for better comprehension we opt for sequential naming since this ports
+// serve logical information for schedule only.
+// 6 INT ALU Ports {P0 to P5}
+def LNLPPort00 : ProcResource<1>;
+def LNLPPort01 : ProcResource<1>;
+def LNLPPort02 : ProcResource<1>;
+def LNLPPort03 : ProcResource<1>;
+def LNLPPort04 : ProcResource<1>;
+def LNLPPort05 : ProcResource<1>;
+// 4 VEC ALU Ports {V0 to V3}
+def LNLPVPort00 : ProcResource<1>;
+def LNLPVPort01 : ProcResource<1>;
+def LNLPVPort02 : ProcResource<1>;
+def LNLPVPort03 : ProcResource<1>;
+// 2 Store Data Ports {P10 to P11}
+def LNLPPort10 : ProcResource<1>;
+def LNLPPort11 : ProcResource<1>;
+// 6 MEM Ports 6 AGU shared with 3 LD, 3 ST
+// AGU LD {P20 to P22}
+def LNLPPort20 : ProcResource<1>;
+def LNLPPort21 : ProcResource<1>;
+def LNLPPort22 : ProcResource<1>;
+// AGU ST {P25 to P27}
+def LNLPPort25 : ProcResource<1>;
+def LNLPPort26 : ProcResource<1>;
+def LNLPPort27 : ProcResource<1>;
+
+// Workaround to represent invalid ports. WriteRes shouldn't use this resource.
+def LNLPPortInvalid :ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def LNLPVPort00_01          : ProcResGroup<[LNLPVPort00, LNLPVPort01]>;
+def LNLPVPort02_03          : ProcResGroup<[LNLPVPort02, LNLPVPort03]>;
+def LNLPPort00_02_04        : ProcResGroup<[LNLPPort00, LNLPPort02, LNLPPort04]>;
+def LNLPPort01_03_05        : ProcResGroup<[LNLPPort01, LNLPPort03, LNLPPort05]>;
+def LNLPPort20_21_22        : ProcResGroup<[LNLPPort20, LNLPPort21, LNLPPort22]>;
+def LNLPPort25_26_27        : ProcResGroup<[LNLPPort25, LNLPPort26, LNLPPort27]>;
+
+// INT EU has 112 reservation stations.
+def LNLPPort00_01_02_03_04_05 : ProcResGroup<[LNLPPort00, LNLPPort01, LNLPPort02,
+                                             LNLPPort03, LNLPPort04, LNLPPort05]>{ 
+  let BufferSize = 110;  // Reduced from 128 in GLC
+}
+
+// VEC EU has 180 reservation stations.
+def LNLPVPort00_01_02_03    : ProcResGroup<[LNLPVPort00, LNLPVPort01, LNLPVPort02,
+                                          LNLPVPort03]>{
+  let BufferSize = 180; // EU for INT and VEC are seperated 
+                        // VEC QUEUE SIZE = 60 + VEC EU RS (60+60)
+}
+// STD has 48 reservation stations.
+def LNLPPort10_11          : ProcResGroup<[LNLPPort10, LNLPPort11]> {
+  let BufferSize = 48;
+}
+
+// MEM has 72 reservation stations.
+def LNLPPort20_21_22_25_26_27 : ProcResGroup<[LNLPPort20, LNLPPort21, LNLPPort22,
+                                            LNLPPort25, LNLPPort26, LNLPPort27]> {
+  let BufferSize = 72;
+}
+
+def LNLPPortAny : ProcResGroup<[LNLPPort00, LNLPPort01, LNLPPort02, LNLPPort03,
+                               LNLPPort04, LNLPPort05, LNLPVPort00, LNLPVPort01,
+                               LNLPVPort02, LNLPVPort03, LNLPPort10, LNLPPort11,
+                               LNLPPort20, LNLPPort21, LNLPPort22, LNLPPort25,
+                               LNLPPort26, LNLPPort27]>;
+
+// Integer loads are 4 cycles, so ReadAfterLd registers needn't be available
+// until 4 cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// TODO: 6 Cycle latency for Vec load comes from ADL
+// Vector loads are 6 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 6 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 6>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 6>;
+
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass LNLPWriteResPair<X86FoldableSchedWrite SchedRW,
+                           list<ProcResourceKind> ExePorts,
+                           int Lat, list<int> Res = [1], int UOps = 1,
+                           int LoadLat = 4, int LoadUOps = 1> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ReleaseAtCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses a cycle on port 20/21/22 and adds LoadLat cycles to
+  // the latency (default = 4).
+  def : WriteRes<SchedRW.Folded, !listconcat([LNLPPort20_21_22], ExePorts)> {
+    let Latency = !add(Lat, LoadLat);
+    let ReleaseAtCycles = !listconcat([1], Res);
+    let NumMicroOps = !add(UOps, LoadUOps);
+  }
+}
+
+defm : X86WriteResUnsupported<WriteBEXTRLd>;
+
+//===----------------------------------------------------------------------===//
+// The following definitons are infered by smg.
+//===----------------------------------------------------------------------===//
+
+def : WriteRes<WriteADC, [LNLPPort00_02_04]>;
+defm : X86WriteRes<WriteADCLd, [LNLPPort00_01_02_03_04_05, LNLPPort00_02_04], 11, [1, 1], 2>;
+def : WriteRes<WriteAESDecEnc, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 3;
+}
+defm : X86WriteRes<WriteAESDecEncLd, [LNLPVPort00_01, LNLPPort20_21_22], 11, [4, 7], 2>;
+defm : X86WriteResPairUnsupported<WriteAESIMC>;
+defm : X86WriteResPairUnsupported<WriteAESKeyGen>;
+def  : WriteRes<WriteALU, [LNLPPort01_03_05]>;
+def  : WriteRes<WriteALULd, [LNLPPort01_03_05, LNLPPort20_21_22]> {
+  let Latency = 11;
+}
+defm : X86WriteRes<WriteBEXTR, [LNLPPort01_03_05], 6, [2], 2>;
+def  : WriteRes<WriteBLS, [LNLPPort01_03_05]>;
+defm : X86WriteRes<WriteBLSLd, [LNLPPort01_03_05, LNLPPort20_21_22], 5, [1, 4], 2>;
+defm : LNLPWriteResPair<WriteBSF, [LNLPPort01_03_05], 3, [1]>;
+defm : LNLPWriteResPair<WriteBSR, [LNLPPort01_03_05], 3, [1]>;
+def  : WriteRes<WriteBSWAP32, [LNLPPort01_03_05]>;
+defm : X86WriteRes<WriteBSWAP64, [LNLPPort01_03_05, LNLPPort01_03_05], 2, [1, 1], 2>;
+defm : LNLPWriteResPair<WriteBZHI, [LNLPPort01_03_05], 3, [1]>;
+def : WriteRes<WriteBitTestSet, [LNLPPort01_03_05]>;
+def : WriteRes<WriteBitTestSetImmLd, [LNLPPort01_03_05]> {
+  let Latency = 11;
+}
+defm : X86WriteRes<WriteBitTestSetRegLd, [LNLPPort00_01_02_03_04_05, LNLPPort00_02_04, LNLPPort00_02_04, LNLPPort01_03_05], 17, [3, 2, 1, 2], 8>;
+def : WriteRes<WriteBitTest, [LNLPPort01_03_05]>;
+defm : X86WriteRes<WriteBitTestImmLd, [LNLPPort01_03_05, LNLPPort20_21_22], 6, [1, 1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [LNLPPort00_01_02_03_04_05, LNLPPort00_02_04, LNLPPort00_02_04, LNLPPort01_03_05, LNLPPort20_21_22], 11, [4, 2, 1, 2, 1], 10>;
+
+def  : WriteRes<WriteBlend, [LNLPVPort00_01_02_03]>;
+defm : X86WriteRes<WriteBlendLd, [LNLPVPort00_01_02_03, LNLPPort20_21_22], 7, [1, 6], 2>;
+def  : WriteRes<WriteBlendY, [LNLPVPort00_01_02_03]>;
+defm : X86WriteRes<WriteBlendYLd, [LNLPVPort00_01_02_03, LNLPPort20_21_22], 8, [1, 7], 2>;
+
+defm : LNLPWriteResPair<WriteCLMul, [LNLPVPort02_03], 3, [1], 1, 7>;
+def : WriteRes<WriteCMOV, [LNLPPort00_01_02_03_04_05]>;
+def : WriteRes<WriteCMOVLd, [LNLPPort00_01_02_03_04_05, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [1, 4];
+  let Latency = 5;
+}
+defm : X86WriteRes<WriteCMPXCHG, [LNLPPort00_01_02_03_04_05, LNLPPort00_02_04], 3, [3, 2], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [LNLPPort00_01_02_03_04_05, LNLPPort00_02_04, LNLPPort20_21_22, LNLPPort10_11, LNLPPort20_21_22], 12, [1, 2, 1, 1, 1], 6>;
+
+def  : WriteRes<WriteCRC32, [LNLPPort01_03_05]> {
+  let ReleaseAtCycles = [3];
+  let Latency = 3;
+}
+def  : WriteRes<WriteCRC32Ld, [LNLPPort01_03_05, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [3, 4];
+  let Latency = 7;
+}
+
+defm : X86WriteRes<WriteCvtI2PD, [LNLPVPort00_01, LNLPVPort02_03], 5, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDLd, [LNLPVPort00_01, LNLPPort20_21_22], 11, [1,1], 2>; 
+defm : X86WriteRes<WriteCvtI2PDY, [LNLPVPort00_01, LNLPVPort02_03], 7, [4, 3], 2>;
+def  : WriteRes<WriteCvtI2PDYLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 7];
+  let Latency = 11;
+}
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+def  : WriteRes<WriteCvtI2PS, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteCvtI2PSLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+defm : LNLPWriteResPair<WriteCvtI2PSY, [LNLPVPort00_01], 4, [1], 1, 8>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : X86WriteRes<WriteCvtI2SD, [LNLPPort01_03_05, LNLPVPort00_01], 8, [4, 4], 2>;
+def  : WriteRes<WriteCvtI2SDLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+defm : X86WriteRes<WriteCvtI2SS, [LNLPPort01_03_05, LNLPVPort00_01], 8, [4, 4], 2>;
+def  : WriteRes<WriteCvtI2SSLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+
+defm : LNLPWriteResPair<WriteCvtPD2I, [LNLPVPort00_01, LNLPVPort02_03], 5, [1, 1], 2, 7>;
+defm : LNLPWriteResPair<WriteCvtPD2IY, [LNLPVPort00_01, LNLPVPort02_03], 7, [1, 1], 2, 8>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+defm : LNLPWriteResPair<WriteCvtPD2PS, [LNLPVPort00_01, LNLPVPort02_03], 5, [1, 1], 2, 7>;
+defm : LNLPWriteResPair<WriteCvtPD2PSY, [LNLPVPort00_01, LNLPVPort02_03], 7, [1, 1], 2, 8>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+defm : X86WriteRes<WriteCvtPH2PS, [LNLPVPort00_01, LNLPVPort02_03], 5, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [LNLPVPort00_01, LNLPPort20_21_22], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [LNLPVPort00_01, LNLPVPort02_03], 7, [4, 3], 2>;
+def  : WriteRes<WriteCvtPH2PSYLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 7];
+  let Latency = 11;
+}
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+def  : WriteRes<WriteCvtPS2I, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+defm : X86WriteRes<WriteCvtPS2ILd, [LNLPVPort00_01, LNLPPort20_21_22], 10, [4, 6], 2>;
+def   : WriteRes<WriteCvtPS2IY, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteCvtPS2IYLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 7];
+  let Latency = 11;
+}
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : X86WriteRes<WriteCvtPS2PD, [LNLPVPort00_01, LNLPVPort02_03], 5, [4, 1], 2>;
+def  : WriteRes<WriteCvtPS2PDLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+defm : X86WriteRes<WriteCvtPS2PDY, [LNLPVPort00_01, LNLPVPort02_03], 7, [4, 3], 2>;
+def  : WriteRes<WriteCvtPS2PDYLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 7];
+  let Latency = 11;
+}
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+defm : X86WriteRes<WriteCvtPS2PH, [LNLPVPort00_01, LNLPVPort02_03], 5, [4, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [LNLPVPort00_01, LNLPPort10_11, LNLPPort25_26_27], 5, [4, 1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [LNLPVPort00_01, LNLPVPort02_03], 7, [4, 3], 2>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [LNLPVPort00_01, LNLPPort10_11, LNLPPort25_26_27], 5, [4, 1, 1], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+defm : X86WriteRes<WriteCvtSD2I, [LNLPVPort00_01], 7, [7], 2>;
+defm : X86WriteRes<WriteCvtSD2ILd, [LNLPVPort00_01, LNLPPort20_21_22], 11, [7, 4], 3>;
+defm : X86WriteRes<WriteCvtSD2SS, [LNLPVPort00_01, LNLPVPort02_03], 5, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtSD2SSLd, [LNLPVPort00_01, LNLPVPort02_03, LNLPPort20_21_22], 11, [4, 1, 6], 3>;
+defm : X86WriteRes<WriteCvtSS2I, [LNLPVPort00_01], 7, [7], 2>;
+defm : X86WriteRes<WriteCvtSS2ILd, [LNLPVPort00_01, LNLPPort20_21_22], 11, [7, 4], 2>;
+defm : X86WriteRes<WriteCvtSS2SD, [LNLPVPort00_01, LNLPVPort02_03], 5, [4, 1], 2>;
+def  : WriteRes<WriteCvtSS2SDLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+defm : X86WriteRes<WriteDPPD, [LNLPVPort00_01, LNLPVPort02_03], 8, [2, 1], 3>;
+defm : X86WriteRes<WriteDPPDLd, [LNLPVPort00_01, LNLPVPort02_03, LNLPPort20_21_22], 13, [2, 1, 1], 4>;
+//defm : X86WriteRes<WriteDPPS, [LNLPVPort00_01, LNLPVPort02_03], 99, [7, 5], 5>;
+// FIXME: Incompleted schedwrite.
+//defm : X86WriteResUnsupported<WriteDPPSLd>;
+defm : LNLPWriteResPair<WriteDPPS, [LNLPVPort00_01, LNLPVPort02_03, LNLPVPort02_03, LNLPVPort02_03, LNLPVPort00_01], 11, [2, 1, 1, 1, 1], 5, 7>;
+//defm : X86WriteRes<WriteDPPSY, [LNLPVPort00_01, LNLPVPort02_03], 12, [7, 5], 5>;
+// FIXME: Incompleted schedwrite.
+//defm : X86WriteResUnsupported<WriteDPPSYLd>;
+defm : LNLPWriteResPair<WriteDPPSY, [LNLPVPort00_01, LNLPVPort02_03, LNLPVPort02_03, LNLPVPort02_03, LNLPVPort00_01], 12, [2, 1, 1, 1, 1], 5, 7>;
+
+defm : LNLPWriteResPair<WriteDiv16, [LNLPPort00_01_02_03_04_05, LNLPPort01], 16, [1, 3], 4, 4>;
+defm : LNLPWriteResPair<WriteDiv32, [LNLPPort00_01_02_03_04_05, LNLPPort01], 15, [1, 3], 4, 4>;
+defm : LNLPWriteResPair<WriteDiv64, [LNLPPort01], 18, [3], 3>;
+defm : X86WriteRes<WriteDiv8, [LNLPPort01], 17, [3], 3>;
+defm : X86WriteRes<WriteDiv8Ld, [LNLPPort01], 22, [3], 3>;
+defm : X86WriteRes<WriteEMMS, [LNLPPort01, LNLPVPort00_01, LNLPVPort02_03], 10, [1, 8, 1], 10>;
+def  : WriteRes<WriteFAdd, [LNLPVPort02_03]> {
+  let Latency = 2;
+}
+// FIXME: Latency
+defm : X86WriteRes<WriteFAddLd, [LNLPVPort02_03, LNLPPort20_21_22], 10, [1,1], 2>;  // 8
+defm : LNLPWriteResPair<WriteFAdd64, [LNLPVPort02_03], 3, [1], 1, 7>;
+defm : LNLPWriteResPair<WriteFAdd64X, [LNLPVPort02_03], 3, [1], 1, 7>;
+defm : LNLPWriteResPair<WriteFAdd64Y, [LNLPVPort02_03], 3, [1], 1, 8>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : LNLPWriteResPair<WriteFAddX, [LNLPVPort02_03], 3, [1], 1, 7>;
+defm : LNLPWriteResPair<WriteFAddY, [LNLPVPort02_03], 3, [1], 1, 8>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+def  : WriteRes<WriteFBlend, [LNLPVPort00_01_02_03]>;
+defm : X86WriteRes<WriteFBlendLd, [LNLPVPort00_01_02_03, LNLPPort20_21_22], 7, [1, 6], 2>;
+def  : WriteRes<WriteFBlendY, [LNLPVPort00_01_02_03]>;
+defm : X86WriteRes<WriteFBlendYLd, [LNLPVPort00_01_02_03, LNLPPort20_21_22], 8, [1, 7], 2>;
+def : WriteRes<WriteFCMOV, [LNLPVPort00_01]> {
+  let Latency = 3;
+}
+def : WriteRes<WriteFCmp, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteFCmpLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+def : WriteRes<WriteFCmp64, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteFCmp64Ld, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+def : WriteRes<WriteFCmp64X, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteFCmp64XLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+def : WriteRes<WriteFCmp64Y, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteFCmp64YLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 7];
+  let Latency = 11;
+}
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+def : WriteRes<WriteFCmpX, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteFCmpXLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 6];
+  let Latency = 10;
+}
+def : WriteRes<WriteFCmpY, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [4];
+  let Latency = 4;
+}
+def : WriteRes<WriteFCmpYLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [4, 7];
+  let Latency = 11;
+}
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+def : WriteRes<WriteFCom, [LNLPVPort00_01]>;
+defm : X86WriteRes<WriteFComLd, [LNLPVPort00_01, LNLPPort20_21_22], 8, [1, 1], 2>;
+def  : WriteRes<WriteFComX, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [3];
+  let Latency = 3;
+}
+defm : X86WriteRes<WriteFComXLd, [LNLPVPort00_01, LNLPPort20_21_22], 9, [3, 6], 2>;
+def  : WriteRes<WriteFDiv, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [7];
+  let Latency = 7;
+}
+def : WriteRes<WriteFDivLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [7, 6];
+  let Latency = 13;
+}
+def : WriteRes<WriteFDiv64, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [10];
+  let Latency = 10;
+}
+def : WriteRes<WriteFDiv64Ld, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [10, 6];
+  let Latency = 16;
+}
+def : WriteRes<WriteFDiv64X, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [10];
+  let Latency = 10;
+}
+def : WriteRes<WriteFDiv64XLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [10, 6];
+  let Latency = 16;
+}
+def : WriteRes<WriteFDiv64Y, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [10];
+  let Latency = 10;
+}
+def : WriteRes<WriteFDiv64YLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [10, 7];
+  let Latency = 17;
+}
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+def : WriteRes<WriteFDivX, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [7];
+  let Latency = 7;
+}
+def : WriteRes<WriteFDivXLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [7, 6];
+  let Latency = 13;
+}
+def : WriteRes<WriteFDivY, [LNLPVPort00_01]> {
+  let ReleaseAtCycles = [7];
+  let Latency = 7;
+}
+def : WriteRes<WriteFDivYLd, [LNLPVPort00_01, LNLPPort20_21_22]> {
+  let ReleaseAtCycles = [7, 7];
+  let Latenc...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/139446


More information about the llvm-commits mailing list