[llvm] [AArch64] Neoverse V1 scheduling info (PR #126707)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 11 02:11:27 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Julien Villette (jvillette38)
<details>
<summary>Changes</summary>
This PR fixes scheduling model for the Neoverse V1. All information is taken from the Neoverse V1 Software Optimisation Guide:
https://developer.arm.com/documentation/pjdoc466751330-9685/6-0
Changes:
- micro operations are reduced to maximum 3 and respect the number of max issues.
- use ReleaseAtCycles to specify throughput
- fix bypass latencies
- fix some latencies/throughput
Consider conflicts between SVE and ASIMD instructions.
Software Optimization Guide:
Maximum issue bandwidth is sustained using one of the following combinations:
• 2 SVE Uops.
• 4 ASIMD Uops.
• 1 SVE Uop on V0 and 2 ASIMD Uops on VX13.
• 1 SVE Uop on V1 and 2 ASIMD Uops on V02.
This merge request depends on #<!-- -->126703 due to new test: llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s.
This test reports all scheduling information changes from this patch if compared with the version of #<!-- -->126703.
@<!-- -->Rin18 may be interested.
---
Patch is 2.95 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126707.diff
11 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td (+797-598)
- (modified) llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td (+43)
- (modified) llvm/lib/Target/AArch64/AArch64SchedPredicates.td (+31-3)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/512tvb-sve-instructions.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s (+388-388)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s (+50-50)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s (+405-405)
- (added) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s (+7591)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s (+3316-3316)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s (+900-898)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s (+3-3)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 368665467859f5f..99ca28bc4151dad 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -66,6 +66,11 @@ def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1,
def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units
def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units
def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units
+// Select V0 + V2 or V1 + V3 by issuing 2 micro operations
+def V1UnitSVE01 : ProcResGroup<[V1UnitV0, V1UnitV1, // FP/ASIMD 0,2/1,3 units
+ V1UnitV2, V1UnitV3]>;
+def V1UnitSVE0 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0,2 units
+def V1UnitSVE1 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1,3 units
// Define commonly used read types.
@@ -98,377 +103,487 @@ def V1Write_0c_0Z : SchedWriteRes<[]>;
def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; }
def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; }
-def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1; }
+def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1;
+ let NumMicroOps = 2; }
def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; }
+def V1Write_4c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4;
+ let ReleaseAtCycles = [3]; }
+def V1Write_5c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 5;
+ let ReleaseAtCycles = [3]; }
+
def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; }
+def V1Write_6c2_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6;
+ let ReleaseAtCycles = [2]; }
+def V1Write_6c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_7c4_1L : SchedWriteRes<[V1UnitL]> { let Latency = 7;
+ let ReleaseAtCycles = [4]; }
def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; }
-def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2; }
+def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; }
-def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; }
+def V1Write_4c6_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4;
+ let ReleaseAtCycles = [6]; }
def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; }
def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
+def V1Write_2c2_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2;
+ let ReleaseAtCycles = [2]; }
+def V1Write_3c2_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3;
+ let ReleaseAtCycles = [2]; }
def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; }
def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; }
-def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
- let ReleaseAtCycles = [5]; }
-def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
- let ReleaseAtCycles = [5]; }
+def V1Write_12c12_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
+ let ReleaseAtCycles = [12]; }
+def V1Write_20c20_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; }
+def V1Write_2c4_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2;
+ let ReleaseAtCycles = [4]; }
def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
+def V1Write_4c2_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
+def V1Write_6c3_1V : SchedWriteRes<[V1UnitV]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_12c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 12;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
+def V1Write_14c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 14;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
+
def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; }
+def V1Write_2c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; }
+def V1Write_3c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 3;
+ let NumMicroOps = 2; }
def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
-def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; }
-def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10;
- let ReleaseAtCycles = [7]; }
-def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12;
- let ReleaseAtCycles = [7]; }
-def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13;
- let ReleaseAtCycles = [10]; }
-def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15;
- let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16;
- let ReleaseAtCycles = [7]; }
-def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20;
- let ReleaseAtCycles = [7]; }
+def V1Write_4c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def V1Write_5c4_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 5;
+ let ReleaseAtCycles = [4];
+ let NumMicroOps = 2; }
+def V1Write_6c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def V1Write_6c4_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4,4]; }
+def V1Write_10c18_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [18]; }
+def V1Write_11c20_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [20]; }
+def V1Write_12c22_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 12;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [22]; }
+def V1Write_13c24_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 13;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [24]; }
+def V1Write_15c28_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 15;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [28]; }
+def V1Write_16c28_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 16;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [28]; }
+def V1Write_19c36_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 19;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [36]; }
+def V1Write_20c40_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 20;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [40]; }
+
def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
+def V1Write_2c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
-def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
-def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Write_3c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def V1Write_4c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def V1Write_4c2_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+def V1Write_4c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+ let ReleaseAtCycles = [3]; }
+def V1Write_6c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_6c5_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+ let ReleaseAtCycles = [5]; }
+def V1Write_8c6_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [6]; }
+def V1Write_9c8_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 9;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [8]; }
+def V1Write_12c8_1SVE01: SchedWriteRes<[V1UnitSVE01]> { let Latency = 12;
+ let ReleaseAtCycles = [8];
+ let NumMicroOps = 2; }
+def V1Write_13c6_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 13;
+ let ReleaseAtCycles = [12];
+ let NumMicroOps = 2; }
+def V1Write_11c10_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [10]; }
def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
+def V1Write_4c2_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+def V1Write_6c4_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 6;
+ let ReleaseAtCycles = [4]; }
+def V1Write_7c2_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
+ let ReleaseAtCycles = [2]; }
def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
let ReleaseAtCycles = [7]; }
-def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
- let ReleaseAtCycles = [7]; }
-def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+def V1Write_9c3_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 9;
+ let ReleaseAtCycles = [2]; }
+def V1Write_10c3_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+ let ReleaseAtCycles = [3]; }
+def V1Write_10c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
let ReleaseAtCycles = [5]; }
-def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
- let ReleaseAtCycles = [11]; }
+def V1Write_10c9_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+ let ReleaseAtCycles = [9]; }
+def V1Write_13c13_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+ let ReleaseAtCycles = [13]; }
def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
- let ReleaseAtCycles = [7]; }
+def V1Write_15c14_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
+ let ReleaseAtCycles = [14]; }
+def V1Write_16c8_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+ let ReleaseAtCycles = [8]; }
+def V1Write_16c15_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+ let ReleaseAtCycles = [15]; }
def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; }
-def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; }
-def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; }
+def V1Write_2c_1SVE1 : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def V1Write_3c_1SVE1 : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def V1Write_4c_1SVE1 : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def V1Write_8c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
+def V1Write_10c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
+def V1Write_4c2_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+
//===----------------------------------------------------------------------===//
// Define generic 2 micro-op types
-let Latency = 1, NumMicroOps = 2 in
-def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>;
-let Latency = 3, NumMicroOps = 2 in
-def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>;
-let Latency = 8, NumMicroOps = 2 in
-def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 11, NumMicroOps = 2 in
-def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 1, NumMicroOps = 2 in
-def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
-let Latency = 3, NumMicroOps = 2 in
-def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>;
-let Latency = 8, NumMicroOps = 2 in
-def V1Write_8c_1V_1V01 : SchedWriteRes<[V1UnitV, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
+def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
-//===----------------------------------------------------------------------===//
-// Define generic 3 micro-op types
+def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
-let Latency = 7, NumMicroOps = 3 in
-def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 3 in
-def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
-let Latency = 4, NumMicroOps = 3 in
-def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 4, NumMicroOps = 3 in
-def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 3 in
-def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
+def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
-//===----------------------------------------------------------------------===//
-// Define generic 4 micro-op types
-
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0,
- V1UnitV0, V1UnitV0]>;
-let Latency = 7, NumMicroOps = 4 in
-def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitV, V1UnitV]>;
-let Latency = 9, NumMicroOps = 4 in
-def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitV, V1UnitV]>;
-let Latency = 11, NumMicroOps = 4 in
-def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitV, V1UnitV]>;
-let Latency = 10, NumMicroOps = 4 in
-def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV, V1UnitV]>;
-let Latency = 2, NumMicroOps = 4 in
-def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 4 in
-def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/126707
More information about the llvm-commits
mailing list