[llvm] [AArch64] Neoverse V1 scheduling info (PR #126707)
Julien Villette via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 02:15:11 PST 2025
================
@@ -98,377 +103,487 @@ def V1Write_0c_0Z : SchedWriteRes<[]>;
def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; }
def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; }
-def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1; }
+def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1;
+ let NumMicroOps = 2; }
def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; }
+def V1Write_4c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4;
+ let ReleaseAtCycles = [3]; }
+def V1Write_5c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 5;
+ let ReleaseAtCycles = [3]; }
+
def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; }
+def V1Write_6c2_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6;
+ let ReleaseAtCycles = [2]; }
+def V1Write_6c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_7c4_1L : SchedWriteRes<[V1UnitL]> { let Latency = 7;
+ let ReleaseAtCycles = [4]; }
def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; }
-def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2; }
+def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; }
-def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; }
+def V1Write_4c6_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4;
+ let ReleaseAtCycles = [6]; }
def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; }
def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
+def V1Write_2c2_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2;
+ let ReleaseAtCycles = [2]; }
+def V1Write_3c2_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3;
+ let ReleaseAtCycles = [2]; }
def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; }
def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; }
-def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
- let ReleaseAtCycles = [5]; }
-def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
- let ReleaseAtCycles = [5]; }
+def V1Write_12c12_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
+ let ReleaseAtCycles = [12]; }
+def V1Write_20c20_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; }
+def V1Write_2c4_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2;
+ let ReleaseAtCycles = [4]; }
def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
+def V1Write_4c2_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
+def V1Write_6c3_1V : SchedWriteRes<[V1UnitV]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_12c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 12;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
+def V1Write_14c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 14;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
+
def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; }
+def V1Write_2c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; }
+def V1Write_3c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 3;
+ let NumMicroOps = 2; }
def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
-def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; }
-def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10;
- let ReleaseAtCycles = [7]; }
-def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12;
- let ReleaseAtCycles = [7]; }
-def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13;
- let ReleaseAtCycles = [10]; }
-def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15;
- let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16;
- let ReleaseAtCycles = [7]; }
-def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20;
- let ReleaseAtCycles = [7]; }
+def V1Write_4c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def V1Write_5c4_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 5;
+ let ReleaseAtCycles = [4];
+ let NumMicroOps = 2; }
+def V1Write_6c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def V1Write_6c4_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4,4]; }
+def V1Write_10c18_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [18]; }
+def V1Write_11c20_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [20]; }
+def V1Write_12c22_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 12;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [22]; }
+def V1Write_13c24_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 13;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [24]; }
+def V1Write_15c28_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 15;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [28]; }
+def V1Write_16c28_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 16;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [28]; }
+def V1Write_19c36_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 19;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [36]; }
+def V1Write_20c40_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 20;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [40]; }
+
def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
+def V1Write_2c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
-def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
-def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Write_3c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def V1Write_4c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def V1Write_4c2_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+def V1Write_4c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+ let ReleaseAtCycles = [3]; }
+def V1Write_6c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_6c5_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+ let ReleaseAtCycles = [5]; }
+def V1Write_8c6_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [6]; }
+def V1Write_9c8_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 9;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [8]; }
+def V1Write_12c8_1SVE01: SchedWriteRes<[V1UnitSVE01]> { let Latency = 12;
+ let ReleaseAtCycles = [8];
+ let NumMicroOps = 2; }
+def V1Write_13c6_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 13;
+ let ReleaseAtCycles = [12];
+ let NumMicroOps = 2; }
+def V1Write_11c10_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [10]; }
def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
+def V1Write_4c2_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+def V1Write_6c4_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 6;
+ let ReleaseAtCycles = [4]; }
+def V1Write_7c2_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
+ let ReleaseAtCycles = [2]; }
def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
let ReleaseAtCycles = [7]; }
-def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
- let ReleaseAtCycles = [7]; }
-def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+def V1Write_9c3_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 9;
+ let ReleaseAtCycles = [2]; }
+def V1Write_10c3_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+ let ReleaseAtCycles = [3]; }
+def V1Write_10c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
let ReleaseAtCycles = [5]; }
-def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
- let ReleaseAtCycles = [11]; }
+def V1Write_10c9_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+ let ReleaseAtCycles = [9]; }
+def V1Write_13c13_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+ let ReleaseAtCycles = [13]; }
def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
- let ReleaseAtCycles = [7]; }
+def V1Write_15c14_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
+ let ReleaseAtCycles = [14]; }
+def V1Write_16c8_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+ let ReleaseAtCycles = [8]; }
----------------
jvillette38 wrote:
This one is used for FSQRTDr. Again, I used worste-case throughput.
`FP square root, D-form | FSQRT | 7 to 16 | 4/15 to 4/7 | V02`
So throughput of 4/15. This instruction can be issued in V0 or V2 so throughput in each pipeline is 4/15/2: 2/15.
To get the number of cycles the micro op should stay in pipeline: 15/2 so 7.5.
It was computed with a script to generate references. I am agree that it should be better to consider also best-case + 1/3 between best and worst cases. And probably after benchmarking, only the best case...
I can skip this kind of changes in new patches versions.
Sorry.
https://github.com/llvm/llvm-project/pull/126707
More information about the llvm-commits
mailing list