[llvm] [AArch64] Neoverse V1 scheduling info (PR #126707)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 11 02:11:27 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Julien Villette (jvillette38)

<details>
<summary>Changes</summary>

This PR fixes scheduling model for the Neoverse V1. All information is taken from the Neoverse V1 Software Optimisation Guide:

https://developer.arm.com/documentation/pjdoc466751330-9685/6-0

Changes:
- micro operations are reduced to maximum 3 and respect the number of max issues.
- use ReleaseAtCycles to specify throughput
- fix bypass latencies
- fix some latencies/throughput

Consider conflicts between SVE and ASIMD instructions.
    Software Optimization Guide:
    Maximum issue bandwidth is sustained using one of the following combinations:
    • 2 SVE Uops.
    • 4 ASIMD Uops.
    • 1 SVE Uop on V0 and 2 ASIMD Uops on VX13.
    • 1 SVE Uop on V1 and 2 ASIMD Uops on V02.

This merge request depends on #<!-- -->126703 due to new test: llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s.
This test reports all scheduling information changes from this patch if compared with the version of #<!-- -->126703.

@<!-- -->Rin18 may be interested.

---

Patch is 2.95 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126707.diff


11 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td (+797-598) 
- (modified) llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td (+43) 
- (modified) llvm/lib/Target/AArch64/AArch64SchedPredicates.td (+31-3) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/512tvb-sve-instructions.s (+3-3) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s (+388-388) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s (+50-50) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s (+405-405) 
- (added) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s (+7591) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s (+3316-3316) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s (+900-898) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s (+3-3) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 368665467859f5f..99ca28bc4151dad 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -66,6 +66,11 @@ def V1UnitV   : ProcResGroup<[V1UnitV0, V1UnitV1,
 def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>;   // FP/ASIMD 0/1 units
 def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>;   // FP/ASIMD 0/2 units
 def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>;   // FP/ASIMD 1/3 units
+// Select V0 + V2 or V1 + V3 by issuing 2 micro operations
+def V1UnitSVE01 : ProcResGroup<[V1UnitV0, V1UnitV1,   // FP/ASIMD 0,2/1,3 units
+				V1UnitV2, V1UnitV3]>;
+def V1UnitSVE0 : ProcResGroup<[V1UnitV0, V1UnitV2]>;  // FP/ASIMD 0,2 units
+def V1UnitSVE1 : ProcResGroup<[V1UnitV1, V1UnitV3]>;  // FP/ASIMD 1,3 units
 
 // Define commonly used read types.
 
@@ -98,377 +103,487 @@ def V1Write_0c_0Z : SchedWriteRes<[]>;
 
 def V1Write_1c_1B      : SchedWriteRes<[V1UnitB]>   { let Latency = 1; }
 def V1Write_1c_1I      : SchedWriteRes<[V1UnitI]>   { let Latency = 1; }
-def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]>   { let Latency = 1; }
+def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]>   { let Latency = 1;
+								 let NumMicroOps = 2; }
 def V1Write_4c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 4; }
+def V1Write_4c3_1L     : SchedWriteRes<[V1UnitL]>   { let Latency = 4;
+						      let ReleaseAtCycles = [3]; }
+def V1Write_5c3_1L     : SchedWriteRes<[V1UnitL]>   { let Latency = 5;
+						      let ReleaseAtCycles = [3]; }
+
 def V1Write_6c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 6; }
+def V1Write_6c2_1L     : SchedWriteRes<[V1UnitL]>   { let Latency = 6;
+						      let ReleaseAtCycles = [2]; }
+def V1Write_6c3_1L     : SchedWriteRes<[V1UnitL]>   { let Latency = 6;
+						      let ReleaseAtCycles = [3]; }
+def V1Write_7c4_1L     : SchedWriteRes<[V1UnitL]>   { let Latency = 7;
+						      let ReleaseAtCycles = [4]; }
 def V1Write_1c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
 def V1Write_4c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
 def V1Write_6c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
 def V1Write_2c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 2; }
-def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]>   { let Latency = 2; }
+def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]>   { let Latency = 2;
+								 let NumMicroOps = 2; }
 def V1Write_3c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 3; }
-def V1Write_4c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 4; }
+def V1Write_4c6_1M     : SchedWriteRes<[V1UnitM]>   { let Latency = 4;
+						      let ReleaseAtCycles = [6]; }
 def V1Write_1c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 1; }
 def V1Write_2c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 2; }
+def V1Write_2c2_1M0    : SchedWriteRes<[V1UnitM0]>  { let Latency = 2;
+						      let ReleaseAtCycles = [2]; }
+def V1Write_3c2_1M0    : SchedWriteRes<[V1UnitM0]>  { let Latency = 3;
+						      let ReleaseAtCycles = [2]; }
 def V1Write_3c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 3; }
 def V1Write_5c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 5; }
-def V1Write_12c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 12;
-                                                      let ReleaseAtCycles = [5]; }
-def V1Write_20c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 20;
-                                                      let ReleaseAtCycles = [5]; }
+def V1Write_12c12_1M0  : SchedWriteRes<[V1UnitM0]>  { let Latency = 12;
+						      let ReleaseAtCycles = [12]; }
+def V1Write_20c20_1M0  : SchedWriteRes<[V1UnitM0]>  { let Latency = 20;
+						      let ReleaseAtCycles = [20]; }
 def V1Write_2c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 2; }
+def V1Write_2c4_1V     : SchedWriteRes<[V1UnitV]>   { let Latency = 2;
+						      let ReleaseAtCycles = [4]; }
 def V1Write_3c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 3; }
 def V1Write_4c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 4; }
+def V1Write_4c2_1V     : SchedWriteRes<[V1UnitV]>   { let Latency = 4;
+						      let ReleaseAtCycles = [2]; }
 def V1Write_5c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 5; }
+def V1Write_6c3_1V     : SchedWriteRes<[V1UnitV]>   { let Latency = 6;
+						      let ReleaseAtCycles = [3]; }
+def V1Write_12c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 12;
+						       let NumMicroOps = 2;
+						       let ReleaseAtCycles = [4]; }
+def V1Write_14c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 14;
+						       let NumMicroOps = 2;
+						       let ReleaseAtCycles = [4]; }
+
 def V1Write_2c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 2; }
+def V1Write_2c_1SVE0   : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]>	 { let Latency = 2;
+								   let NumMicroOps = 2; }
 def V1Write_3c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 3; }
+def V1Write_3c_1SVE0   : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]>	 { let Latency = 3;
+								   let NumMicroOps = 2; }
 def V1Write_4c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 4; }
-def V1Write_6c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 6; }
-def V1Write_10c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 10;
-                                                      let ReleaseAtCycles = [7]; }
-def V1Write_12c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 12;
-                                                      let ReleaseAtCycles = [7]; }
-def V1Write_13c10_1V0  : SchedWriteRes<[V1UnitV0]>  { let Latency = 13;
-                                                      let ReleaseAtCycles = [10]; }
-def V1Write_15c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 15;
-                                                      let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 16;
-                                                      let ReleaseAtCycles = [7]; }
-def V1Write_20c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 20;
-                                                      let ReleaseAtCycles = [7]; }
+def V1Write_4c_1SVE0   : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]>	 { let Latency = 4;
+								   let NumMicroOps = 2; }
+def V1Write_5c4_1SVE0  : SchedWriteRes<[V1UnitSVE0]>  { let Latency = 5;
+							let ReleaseAtCycles = [4];
+							let NumMicroOps = 2; }
+def V1Write_6c_1SVE0	: SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]>  { let Latency = 6;
+								    let NumMicroOps = 2; }
+def V1Write_6c4_1SVE0	: SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]>  { let Latency = 6;
+								    let NumMicroOps = 2;
+								    let ReleaseAtCycles = [4,4]; }
+def V1Write_10c18_1SVE0	: SchedWriteRes<[V1UnitSVE0]>  { let Latency = 10;
+							 let NumMicroOps = 2;
+							 let ReleaseAtCycles = [18]; }
+def V1Write_11c20_1SVE0	 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 11;
+							 let NumMicroOps = 2;
+							 let ReleaseAtCycles = [20]; }
+def V1Write_12c22_1SVE0	: SchedWriteRes<[V1UnitSVE0]>  { let Latency = 12;
+							 let NumMicroOps = 2;
+							 let ReleaseAtCycles = [22]; }
+def V1Write_13c24_1SVE0 : SchedWriteRes<[V1UnitSVE0]>  { let Latency = 13;
+							 let NumMicroOps = 2;
+							 let ReleaseAtCycles = [24]; }
+def V1Write_15c28_1SVE0	: SchedWriteRes<[V1UnitSVE0]>  { let Latency = 15;
+							 let NumMicroOps = 2;
+							let ReleaseAtCycles = [28]; }
+def V1Write_16c28_1SVE0	: SchedWriteRes<[V1UnitSVE0]>  { let Latency = 16;
+							 let NumMicroOps = 2;
+							 let ReleaseAtCycles = [28]; }
+def V1Write_19c36_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 19;
+							let NumMicroOps = 2;
+							let ReleaseAtCycles = [36]; }
+def V1Write_20c40_1SVE0	: SchedWriteRes<[V1UnitSVE0]> { let Latency = 20;
+							let NumMicroOps = 2;
+							let ReleaseAtCycles = [40]; }
+
 def V1Write_2c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
+def V1Write_2c_1SVE01  : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 2;
+								    let NumMicroOps = 2; }
 def V1Write_3c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
-def V1Write_4c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
-def V1Write_5c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Write_3c_1SVE01  : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 3;
+								    let NumMicroOps = 2; }
+def V1Write_4c_1SVE01  : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 4;
+								    let NumMicroOps = 2; }
+def V1Write_4c2_1V01   : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+						      let ReleaseAtCycles = [2]; }
+def V1Write_4c3_1V01   : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+						      let ReleaseAtCycles = [3]; }
+def V1Write_6c3_1V01   : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+						      let ReleaseAtCycles = [3]; }
+def V1Write_6c5_1V01   : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+						      let ReleaseAtCycles = [5]; }
+def V1Write_8c6_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 8;
+							let NumMicroOps = 2;
+							let ReleaseAtCycles = [6]; }
+def V1Write_9c8_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 9;
+							let NumMicroOps = 2;
+							let ReleaseAtCycles = [8]; }
+def V1Write_12c8_1SVE01: SchedWriteRes<[V1UnitSVE01]> { let Latency = 12;
+							let ReleaseAtCycles = [8];
+							let NumMicroOps = 2; }
+def V1Write_13c6_1SVE01	 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 13;
+							  let ReleaseAtCycles = [12];
+							  let NumMicroOps = 2; }
+def V1Write_11c10_1SVE01  : SchedWriteRes<[V1UnitSVE01]> { let Latency = 11;
+							   let NumMicroOps = 2;
+							   let ReleaseAtCycles = [10]; }
 def V1Write_3c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
 def V1Write_4c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
+def V1Write_4c2_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 4;
+						      let ReleaseAtCycles = [2]; }
+def V1Write_6c4_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 6;
+						      let ReleaseAtCycles = [4]; }
+def V1Write_7c2_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
+						      let ReleaseAtCycles = [2]; }
 def V1Write_7c7_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
                                                       let ReleaseAtCycles = [7]; }
-def V1Write_10c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
-                                                      let ReleaseAtCycles = [7]; }
-def V1Write_13c5_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+def V1Write_9c3_1V02  : SchedWriteRes<[V1UnitV02]>  { let Latency = 9;
+						      let ReleaseAtCycles = [2]; }
+def V1Write_10c3_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+						      let ReleaseAtCycles = [3]; }
+def V1Write_10c5_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
                                                       let ReleaseAtCycles = [5]; }
-def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
-                                                      let ReleaseAtCycles = [11]; }
+def V1Write_10c9_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+						      let ReleaseAtCycles = [9]; }
+def V1Write_13c13_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+						      let ReleaseAtCycles = [13]; }
 def V1Write_15c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
                                                       let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
-                                                      let ReleaseAtCycles = [7]; }
+def V1Write_15c14_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
+						      let ReleaseAtCycles = [14]; }
+def V1Write_16c8_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+						      let ReleaseAtCycles = [8]; }
+def V1Write_16c15_1V02	: SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+						      let ReleaseAtCycles = [15]; }
 def V1Write_2c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 2; }
-def V1Write_3c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 3; }
-def V1Write_4c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 4; }
+def V1Write_2c_1SVE1   : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]>	 { let Latency = 2;
+								   let NumMicroOps = 2; }
+def V1Write_3c_1SVE1   : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]>	 { let Latency = 3;
+								   let NumMicroOps = 2; }
+def V1Write_4c_1SVE1   : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]>	 { let Latency = 4;
+								   let NumMicroOps = 2; }
+def V1Write_8c4_1SVE1  : SchedWriteRes<[V1UnitSVE1]>  { let Latency = 8;
+							let NumMicroOps = 2;
+							let ReleaseAtCycles = [4]; }
+def V1Write_10c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]>  { let Latency = 10;
+							let NumMicroOps = 2;
+							let ReleaseAtCycles = [4]; }
 def V1Write_2c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
 def V1Write_4c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
+def V1Write_4c2_1V13   : SchedWriteRes<[V1UnitV13]> { let Latency = 4;
+						      let ReleaseAtCycles = [2]; }
+
 
 //===----------------------------------------------------------------------===//
 // Define generic 2 micro-op types
 
-let Latency = 1, NumMicroOps = 2 in
-def V1Write_1c_1B_1S     : SchedWriteRes<[V1UnitB, V1UnitS]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1B_1M0    : SchedWriteRes<[V1UnitB, V1UnitM0]>;
-let Latency = 3, NumMicroOps = 2 in
-def V1Write_3c_1I_1M     : SchedWriteRes<[V1UnitI, V1UnitM]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_2L        : SchedWriteRes<[V1UnitL, V1UnitL]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L_1M     : SchedWriteRes<[V1UnitL, V1UnitM]>;
-let Latency = 8, NumMicroOps = 2 in
-def V1Write_8c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 11, NumMicroOps = 2 in
-def V1Write_11c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 1, NumMicroOps = 2 in
-def V1Write_1c_1L01_1D   : SchedWriteRes<[V1UnitL01, V1UnitD]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
-let Latency = 3, NumMicroOps = 2 in
-def V1Write_3c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1M0_1L    : SchedWriteRes<[V1UnitM0, V1UnitL]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1M0_1V    : SchedWriteRes<[V1UnitM0, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1M0_1V0    : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1M0_1V0   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1M0_1V01    : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1M0_1V1   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1M0_1V1    : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V        : SchedWriteRes<[V1UnitV, V1UnitV]>;
-let Latency = 8, NumMicroOps = 2 in
-def V1Write_8c_1V_1V01   : SchedWriteRes<[V1UnitV, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1V13_1V   : SchedWriteRes<[V1UnitV13, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V13      : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
+def V1Write_1c_1B_1S	 : SchedWriteRes<[V1UnitB, V1UnitS]> {
+    let Latency = 1;
+    let NumMicroOps = 2;
+}
 
-//===----------------------------------------------------------------------===//
-// Define generic 3 micro-op types
+def V1Write_6c_1B_1M0	 : SchedWriteRes<[V1UnitB, V1UnitM0]> {
+    let Latency = 6;
+    let NumMicroOps = 2;
+}
 
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
-let Latency = 7, NumMicroOps = 3 in
-def V1Write_7c_2M0_1V01     : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 3 in
-def V1Write_8c_1L_2V        : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3L           : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
-let Latency = 4, NumMicroOps = 3 in
-def V1Write_4c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_2L01_1V01    : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3V           : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 4, NumMicroOps = 3 in
-def V1Write_4c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 3 in
-def V1Write_8c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
+def V1Write_5c_1I_1L	 : SchedWriteRes<[V1UnitI, V1UnitL]> {
+    let Latency = 5;
+    let NumMicroOps = 2;
+}
 
-//===----------------------------------------------------------------------===//
-// Define generic 4 micro-op types
-
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2M0_2V0   : SchedWriteRes<[V1UnitM0, V1UnitM0,
-                                          V1UnitV0, V1UnitV0]>;
-let Latency = 7, NumMicroOps = 4 in
-def V1Write_7c_4L        : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
-                                             V1UnitV, V1UnitV]>;
-let Latency = 9, NumMicroOps = 4 in
-def V1Write_9c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
-                                             V1UnitV, V1UnitV]>;
-let Latency = 11, NumMicroOps = 4 in
-def V1Write_11c_2L_2V       : SchedWriteRes<[V1UnitL, V1UnitL,
-                                             V1UnitV, V1UnitV]>;
-let Latency = 10, NumMicroOps = 4 in
-def V1Write_10c_2L01_2V     : SchedWriteRes<[V1UnitL01, V1UnitL01,
-                                             V1UnitV, V1UnitV]>;
-let Latency = 2, NumMicroOps = 4 in
-def V1Write_2c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
-                                             V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 4 in
-def V1Write_4c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
-                                             V1UnitV01, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
-                              ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/126707


More information about the llvm-commits mailing list