[llvm] [AArch64] Fix scheduling information for arithmetic and logical instructions. (PR #113542)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 03:24:49 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Rin Dobrescu (Rin18)
<details>
<summary>Changes</summary>
This patch corrects scheduling information relating to Neoverse-V2 arithmetic and logical instructions.
---
Patch is 68.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113542.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td (+16-9)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s (+189-189)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index f884d20ab7eda9..054fd2a382e720 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -56,6 +56,7 @@ def V2UnitD : ProcResource<2>; // Store data 0/1
def V2UnitR : ProcResGroup<[V2UnitS0, V2UnitS1]>; // Integer single-cycle 0/1
def V2UnitS : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>; // Integer single-cycle 0/1/2/3
def V2UnitF : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1 and single/multicycle 0/1
+def V2UnitG : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0]>; // Integer single-cycle 0/1 and single/multicycle 0
def V2UnitI : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
def V2UnitM : ProcResGroup<[V2UnitM0, V2UnitM1]>; // Integer single/multicycle 0/1
def V2UnitL : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
@@ -97,6 +98,7 @@ def V2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
def V2Write_1c_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; }
def V2Write_1c_1F : SchedWriteRes<[V2UnitF]> { let Latency = 1; }
+def V2Write_1c_1G : SchedWriteRes<[V2UnitG]> { let Latency = 1; }
def V2Write_1c_1I : SchedWriteRes<[V2UnitI]> { let Latency = 1; }
def V2Write_1c_1M : SchedWriteRes<[V2UnitM]> { let Latency = 1; }
def V2Write_1c_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 1; }
@@ -886,11 +888,11 @@ def V2Write_ArithI : SchedWriteVariant<[
SchedVar<NoSchedPred, [V2Write_2c_1M]>]>;
def V2Write_ArithF : SchedWriteVariant<[
- SchedVar<IsCheapLSL, [V2Write_1c_1F]>,
+ SchedVar<IsCheapLSL, [V2Write_1c_1G]>,
SchedVar<NoSchedPred, [V2Write_2c_1M]>]>;
def V2Write_Logical : SchedWriteVariant<[
- SchedVar<NeoverseNoLSL, [V2Write_1c_1F]>,
+ SchedVar<NeoverseNoLSL, [V2Write_1c_1G]>,
SchedVar<NoSchedPred, [V2Write_2c_1M]>]>;
def V2Write_Extr : SchedWriteVariant<[
@@ -1106,19 +1108,19 @@ def : InstRW<[V2Write_1c_1B_1R], (instrs BL, BLR)>;
// -----------------------------------------------------------------------------
// ALU, basic
-// ALU, basic, flagset
def : SchedAlias<WriteI, V2Write_1c_1I>;
-def : InstRW<[V2Write_1c_1F], (instregex "^(ADD|SUB)S[WX]r[ir]$",
+
+// ALU, basic, flagset
+def : InstRW<[V2Write_1c_1G],
+ (instregex "^(ADD|SUB)S[WX]r[ir]$",
"^(ADC|SBC)S[WX]r$",
- "^ANDS[WX]ri$")>;
+ "^ANDS[WX]ri$",
+ "^(AND|BIC)S[WX]rr$")>;
def : InstRW<[V2Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
// ALU, extend and shift
def : SchedAlias<WriteIEReg, V2Write_2c_1M>;
-// Conditional compare
-def : InstRW<[V2Write_1c_1F], (instregex "^CCM[NP][WX][ir]")>;
-
// Arithmetic, LSL shift, shift <= 4
// Arithmetic, flagset, LSL shift, shift <= 4
// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
@@ -1129,6 +1131,9 @@ def : InstRW<[V2Write_ArithF],
// Arithmetic, immediate to logical address tag
def : InstRW<[V2Write_2c_1M], (instrs ADDG, SUBG)>;
+// Conditional compare
+def : InstRW<[V2Write_1c_1G], (instregex "^CCM[NP][WX][ir]")>;
+
// Convert floating-point condition flags
// Flag manipulation instructions
def : WriteRes<WriteSys, []> { let Latency = 1; }
@@ -1138,8 +1143,10 @@ def : InstRW<[V2Write_2c_1M], (instrs IRG, IRGstack)>;
// Insert Tag Mask
// Subtract Pointer
+def : InstRW<[V2Write_1c_1I], (instrs GMI, SUBP)>;
+
// Subtract Pointer, flagset
-def : InstRW<[V2Write_1c_1I], (instrs GMI, SUBP, SUBPS)>;
+def : InstRW<[V2Write_1c_1G], (instrs SUBPS)>;
// Logical, shift, no flagset
def : InstRW<[V2Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
index 9a3bcac69582ac..987a41d8b08eed 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
@@ -1406,16 +1406,16 @@ drps
# CHECK-NEXT: 1 1 0.17 sub w4, w20, #546, lsl #12
# CHECK-NEXT: 1 1 0.17 sub sp, sp, #288
# CHECK-NEXT: 1 1 0.17 sub wsp, w19, #16
-# CHECK-NEXT: 1 1 0.25 adds w13, w23, #291, lsl #12
-# CHECK-NEXT: 1 1 0.25 cmn w2, #4095
-# CHECK-NEXT: 1 1 0.25 adds w20, wsp, #0
-# CHECK-NEXT: 1 1 0.25 cmn x3, #1, lsl #12
-# CHECK-NEXT: 1 1 0.25 cmp sp, #20, lsl #12
-# CHECK-NEXT: 1 1 0.25 cmp x30, #4095
-# CHECK-NEXT: 1 1 0.25 subs x4, sp, #3822
-# CHECK-NEXT: 1 1 0.25 cmn w3, #291, lsl #12
-# CHECK-NEXT: 1 1 0.25 cmn wsp, #1365
-# CHECK-NEXT: 1 1 0.25 cmn sp, #1092, lsl #12
+# CHECK-NEXT: 1 1 0.33 adds w13, w23, #291, lsl #12
+# CHECK-NEXT: 1 1 0.33 cmn w2, #4095
+# CHECK-NEXT: 1 1 0.33 adds w20, wsp, #0
+# CHECK-NEXT: 1 1 0.33 cmn x3, #1, lsl #12
+# CHECK-NEXT: 1 1 0.33 cmp sp, #20, lsl #12
+# CHECK-NEXT: 1 1 0.33 cmp x30, #4095
+# CHECK-NEXT: 1 1 0.33 subs x4, sp, #3822
+# CHECK-NEXT: 1 1 0.33 cmn w3, #291, lsl #12
+# CHECK-NEXT: 1 1 0.33 cmn wsp, #1365
+# CHECK-NEXT: 1 1 0.33 cmn sp, #1092, lsl #12
# CHECK-NEXT: 1 1 0.17 mov sp, x30
# CHECK-NEXT: 1 1 0.17 mov wsp, w20
# CHECK-NEXT: 1 1 0.17 mov x11, sp
@@ -1446,11 +1446,11 @@ drps
# CHECK-NEXT: 1 2 0.50 add x2, x3, x4, asr #0
# CHECK-NEXT: 1 2 0.50 add x5, x6, x7, asr #21
# CHECK-NEXT: 1 2 0.50 add x8, x9, x10, asr #63
-# CHECK-NEXT: 1 1 0.25 adds w3, w5, w7
-# CHECK-NEXT: 1 1 0.25 cmn w3, w5
-# CHECK-NEXT: 1 1 0.25 adds w20, wzr, w4
-# CHECK-NEXT: 1 1 0.25 adds w4, w6, wzr
-# CHECK-NEXT: 1 1 0.25 adds w11, w13, w15
+# CHECK-NEXT: 1 1 0.33 adds w3, w5, w7
+# CHECK-NEXT: 1 1 0.33 cmn w3, w5
+# CHECK-NEXT: 1 1 0.33 adds w20, wzr, w4
+# CHECK-NEXT: 1 1 0.33 adds w4, w6, wzr
+# CHECK-NEXT: 1 1 0.33 adds w11, w13, w15
# CHECK-NEXT: 1 2 0.50 adds w9, w3, wzr, lsl #10
# CHECK-NEXT: 1 2 0.50 adds w17, w29, w20, lsl #31
# CHECK-NEXT: 1 2 0.50 adds w21, w22, w23, lsr #0
@@ -1459,11 +1459,11 @@ drps
# CHECK-NEXT: 1 2 0.50 adds w2, w3, w4, asr #0
# CHECK-NEXT: 1 2 0.50 adds w5, w6, w7, asr #21
# CHECK-NEXT: 1 2 0.50 adds w8, w9, w10, asr #31
-# CHECK-NEXT: 1 1 0.25 adds x3, x5, x7
-# CHECK-NEXT: 1 1 0.25 cmn x3, x5
-# CHECK-NEXT: 1 1 0.25 adds x20, xzr, x4
-# CHECK-NEXT: 1 1 0.25 adds x4, x6, xzr
-# CHECK-NEXT: 1 1 0.25 adds x11, x13, x15
+# CHECK-NEXT: 1 1 0.33 adds x3, x5, x7
+# CHECK-NEXT: 1 1 0.33 cmn x3, x5
+# CHECK-NEXT: 1 1 0.33 adds x20, xzr, x4
+# CHECK-NEXT: 1 1 0.33 adds x4, x6, xzr
+# CHECK-NEXT: 1 1 0.33 adds x11, x13, x15
# CHECK-NEXT: 1 2 0.50 adds x9, x3, xzr, lsl #10
# CHECK-NEXT: 1 2 0.50 adds x17, x29, x20, lsl #63
# CHECK-NEXT: 1 2 0.50 adds x21, x22, x23, lsr #0
@@ -1496,10 +1496,10 @@ drps
# CHECK-NEXT: 1 2 0.50 sub x2, x3, x4, asr #0
# CHECK-NEXT: 1 2 0.50 sub x5, x6, x7, asr #21
# CHECK-NEXT: 1 2 0.50 sub x8, x9, x10, asr #63
-# CHECK-NEXT: 1 1 0.25 subs w3, w5, w7
-# CHECK-NEXT: 1 1 0.25 cmp w3, w5
-# CHECK-NEXT: 1 1 0.25 subs w4, w6, wzr
-# CHECK-NEXT: 1 1 0.25 subs w11, w13, w15
+# CHECK-NEXT: 1 1 0.33 subs w3, w5, w7
+# CHECK-NEXT: 1 1 0.33 cmp w3, w5
+# CHECK-NEXT: 1 1 0.33 subs w4, w6, wzr
+# CHECK-NEXT: 1 1 0.33 subs w11, w13, w15
# CHECK-NEXT: 1 2 0.50 subs w9, w3, wzr, lsl #10
# CHECK-NEXT: 1 2 0.50 subs w17, w29, w20, lsl #31
# CHECK-NEXT: 1 2 0.50 subs w21, w22, w23, lsr #0
@@ -1508,10 +1508,10 @@ drps
# CHECK-NEXT: 1 2 0.50 subs w2, w3, w4, asr #0
# CHECK-NEXT: 1 2 0.50 subs w5, w6, w7, asr #21
# CHECK-NEXT: 1 2 0.50 subs w8, w9, w10, asr #31
-# CHECK-NEXT: 1 1 0.25 subs x3, x5, x7
-# CHECK-NEXT: 1 1 0.25 cmp x3, x5
-# CHECK-NEXT: 1 1 0.25 subs x4, x6, xzr
-# CHECK-NEXT: 1 1 0.25 subs x11, x13, x15
+# CHECK-NEXT: 1 1 0.33 subs x3, x5, x7
+# CHECK-NEXT: 1 1 0.33 cmp x3, x5
+# CHECK-NEXT: 1 1 0.33 subs x4, x6, xzr
+# CHECK-NEXT: 1 1 0.33 subs x11, x13, x15
# CHECK-NEXT: 1 2 0.50 subs x9, x3, xzr, lsl #10
# CHECK-NEXT: 1 2 0.50 subs x17, x29, x20, lsl #63
# CHECK-NEXT: 1 2 0.50 subs x21, x22, x23, lsr #0
@@ -1520,9 +1520,9 @@ drps
# CHECK-NEXT: 1 2 0.50 subs x2, x3, x4, asr #0
# CHECK-NEXT: 1 2 0.50 subs x5, x6, x7, asr #21
# CHECK-NEXT: 1 2 0.50 subs x8, x9, x10, asr #63
-# CHECK-NEXT: 1 1 0.25 cmn wzr, w4
-# CHECK-NEXT: 1 1 0.25 cmn w5, wzr
-# CHECK-NEXT: 1 1 0.25 cmn w6, w7
+# CHECK-NEXT: 1 1 0.33 cmn wzr, w4
+# CHECK-NEXT: 1 1 0.33 cmn w5, wzr
+# CHECK-NEXT: 1 1 0.33 cmn w6, w7
# CHECK-NEXT: 1 2 0.50 cmn w8, w9, lsl #15
# CHECK-NEXT: 1 2 0.50 cmn w10, w11, lsl #31
# CHECK-NEXT: 1 2 0.50 cmn w12, w13, lsr #0
@@ -1531,10 +1531,10 @@ drps
# CHECK-NEXT: 1 2 0.50 cmn w18, w19, asr #0
# CHECK-NEXT: 1 2 0.50 cmn w20, w21, asr #22
# CHECK-NEXT: 1 2 0.50 cmn w22, w23, asr #31
-# CHECK-NEXT: 1 1 0.25 cmn x0, x3
-# CHECK-NEXT: 1 1 0.25 cmn xzr, x4
-# CHECK-NEXT: 1 1 0.25 cmn x5, xzr
-# CHECK-NEXT: 1 1 0.25 cmn x6, x7
+# CHECK-NEXT: 1 1 0.33 cmn x0, x3
+# CHECK-NEXT: 1 1 0.33 cmn xzr, x4
+# CHECK-NEXT: 1 1 0.33 cmn x5, xzr
+# CHECK-NEXT: 1 1 0.33 cmn x6, x7
# CHECK-NEXT: 1 2 0.50 cmn x8, x9, lsl #15
# CHECK-NEXT: 1 2 0.50 cmn x10, x11, lsl #63
# CHECK-NEXT: 1 2 0.50 cmn x12, x13, lsr #0
@@ -1543,10 +1543,10 @@ drps
# CHECK-NEXT: 1 2 0.50 cmn x18, x19, asr #0
# CHECK-NEXT: 1 2 0.50 cmn x20, x21, asr #55
# CHECK-NEXT: 1 2 0.50 cmn x22, x23, asr #63
-# CHECK-NEXT: 1 1 0.25 cmp w0, w3
-# CHECK-NEXT: 1 1 0.25 cmp wzr, w4
-# CHECK-NEXT: 1 1 0.25 cmp w5, wzr
-# CHECK-NEXT: 1 1 0.25 cmp w6, w7
+# CHECK-NEXT: 1 1 0.33 cmp w0, w3
+# CHECK-NEXT: 1 1 0.33 cmp wzr, w4
+# CHECK-NEXT: 1 1 0.33 cmp w5, wzr
+# CHECK-NEXT: 1 1 0.33 cmp w6, w7
# CHECK-NEXT: 1 2 0.50 cmp w8, w9, lsl #15
# CHECK-NEXT: 1 2 0.50 cmp w10, w11, lsl #31
# CHECK-NEXT: 1 2 0.50 cmp w12, w13, lsr #0
@@ -1554,10 +1554,10 @@ drps
# CHECK-NEXT: 1 2 0.50 cmp w18, w19, asr #0
# CHECK-NEXT: 1 2 0.50 cmp w20, w21, asr #22
# CHECK-NEXT: 1 2 0.50 cmp w22, w23, asr #31
-# CHECK-NEXT: 1 1 0.25 cmp x0, x3
-# CHECK-NEXT: 1 1 0.25 cmp xzr, x4
-# CHECK-NEXT: 1 1 0.25 cmp x5, xzr
-# CHECK-NEXT: 1 1 0.25 cmp x6, x7
+# CHECK-NEXT: 1 1 0.33 cmp x0, x3
+# CHECK-NEXT: 1 1 0.33 cmp xzr, x4
+# CHECK-NEXT: 1 1 0.33 cmp x5, xzr
+# CHECK-NEXT: 1 1 0.33 cmp x6, x7
# CHECK-NEXT: 1 2 0.50 cmp x8, x9, lsl #15
# CHECK-NEXT: 1 2 0.50 cmp x10, x11, lsl #63
# CHECK-NEXT: 1 2 0.50 cmp x12, x13, lsr #0
@@ -1566,8 +1566,8 @@ drps
# CHECK-NEXT: 1 2 0.50 cmp x18, x19, asr #0
# CHECK-NEXT: 1 2 0.50 cmp x20, x21, asr #55
# CHECK-NEXT: 1 2 0.50 cmp x22, x23, asr #63
-# CHECK-NEXT: 1 1 0.25 cmp wzr, w0
-# CHECK-NEXT: 1 1 0.25 cmp xzr, x0
+# CHECK-NEXT: 1 1 0.33 cmp wzr, w0
+# CHECK-NEXT: 1 1 0.33 cmp xzr, x0
# CHECK-NEXT: 1 1 0.17 adc w29, w27, w25
# CHECK-NEXT: 1 1 0.17 adc wzr, w3, w4
# CHECK-NEXT: 1 1 0.17 adc w9, wzr, w10
@@ -1576,14 +1576,14 @@ drps
# CHECK-NEXT: 1 1 0.17 adc xzr, x3, x4
# CHECK-NEXT: 1 1 0.17 adc x9, xzr, x10
# CHECK-NEXT: 1 1 0.17 adc x20, x0, xzr
-# CHECK-NEXT: 1 1 0.25 adcs w29, w27, w25
-# CHECK-NEXT: 1 1 0.25 adcs wzr, w3, w4
-# CHECK-NEXT: 1 1 0.25 adcs w9, wzr, w10
-# CHECK-NEXT: 1 1 0.25 adcs w20, w0, wzr
-# CHECK-NEXT: 1 1 0.25 adcs x29, x27, x25
-# CHECK-NEXT: 1 1 0.25 adcs xzr, x3, x4
-# CHECK-NEXT: 1 1 0.25 adcs x9, xzr, x10
-# CHECK-NEXT: 1 1 0.25 adcs x20, x0, xzr
+# CHECK-NEXT: 1 1 0.33 adcs w29, w27, w25
+# CHECK-NEXT: 1 1 0.33 adcs wzr, w3, w4
+# CHECK-NEXT: 1 1 0.33 adcs w9, wzr, w10
+# CHECK-NEXT: 1 1 0.33 adcs w20, w0, wzr
+# CHECK-NEXT: 1 1 0.33 adcs x29, x27, x25
+# CHECK-NEXT: 1 1 0.33 adcs xzr, x3, x4
+# CHECK-NEXT: 1 1 0.33 adcs x9, xzr, x10
+# CHECK-NEXT: 1 1 0.33 adcs x20, x0, xzr
# CHECK-NEXT: 1 1 0.17 sbc w29, w27, w25
# CHECK-NEXT: 1 1 0.17 sbc wzr, w3, w4
# CHECK-NEXT: 1 1 0.17 ngc w9, w10
@@ -1592,26 +1592,26 @@ drps
# CHECK-NEXT: 1 1 0.17 sbc xzr, x3, x4
# CHECK-NEXT: 1 1 0.17 ngc x9, x10
# CHECK-NEXT: 1 1 0.17 sbc x20, x0, xzr
-# CHECK-NEXT: 1 1 0.25 sbcs w29, w27, w25
-# CHECK-NEXT: 1 1 0.25 sbcs wzr, w3, w4
-# CHECK-NEXT: 1 1 0.25 ngcs w9, w10
-# CHECK-NEXT: 1 1 0.25 sbcs w20, w0, wzr
-# CHECK-NEXT: 1 1 0.25 sbcs x29, x27, x25
-# CHECK-NEXT: 1 1 0.25 sbcs xzr, x3, x4
-# CHECK-NEXT: 1 1 0.25 ngcs x9, x10
-# CHECK-NEXT: 1 1 0.25 sbcs x20, x0, xzr
+# CHECK-NEXT: 1 1 0.33 sbcs w29, w27, w25
+# CHECK-NEXT: 1 1 0.33 sbcs wzr, w3, w4
+# CHECK-NEXT: 1 1 0.33 ngcs w9, w10
+# CHECK-NEXT: 1 1 0.33 sbcs w20, w0, wzr
+# CHECK-NEXT: 1 1 0.33 sbcs x29, x27, x25
+# CHECK-NEXT: 1 1 0.33 sbcs xzr, x3, x4
+# CHECK-NEXT: 1 1 0.33 ngcs x9, x10
+# CHECK-NEXT: 1 1 0.33 sbcs x20, x0, xzr
# CHECK-NEXT: 1 1 0.17 ngc w3, w12
# CHECK-NEXT: 1 1 0.17 ngc wzr, w9
# CHECK-NEXT: 1 1 0.17 ngc w23, wzr
# CHECK-NEXT: 1 1 0.17 ngc x29, x30
# CHECK-NEXT: 1 1 0.17 ngc xzr, x0
# CHECK-NEXT: 1 1 0.17 ngc x0, xzr
-# CHECK-NEXT: 1 1 0.25 ngcs w3, w12
-# CHECK-NEXT: 1 1 0.25 ngcs wzr, w9
-# CHECK-NEXT: 1 1 0.25 ngcs w23, wzr
-# CHECK-NEXT: 1 1 0.25 ngcs x29, x30
-# CHECK-NEXT: 1 1 0.25 ngcs xzr, x0
-# CHECK-NEXT: 1 1 0.25 ngcs x0, xzr
+# CHECK-NEXT: 1 1 0.33 ngcs w3, w12
+# CHECK-NEXT: 1 1 0.33 ngcs wzr, w9
+# CHECK-NEXT: 1 1 0.33 ngcs w23, wzr
+# CHECK-NEXT: 1 1 0.33 ngcs x29, x30
+# CHECK-NEXT: 1 1 0.33 ngcs xzr, x0
+# CHECK-NEXT: 1 1 0.33 ngcs x0, xzr
# CHECK-NEXT: 1 1 0.17 sbfx x1, x2, #3, #2
# CHECK-NEXT: 1 1 0.17 asr x3, x4, #63
# CHECK-NEXT: 1 1 0.17 asr wzr, wzr, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/113542
More information about the llvm-commits
mailing list