[llvm] cd80cd3 - [AArch64] Fix schedmodel zero latency moves for Neoverse V2
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 02:55:56 PDT 2023
Author: Ricardo Jesus
Date: 2023-09-07T10:54:38+01:00
New Revision: cd80cd3d14c6140e5383eb2b6157f1e85c6620aa
URL: https://github.com/llvm/llvm-project/commit/cd80cd3d14c6140e5383eb2b6157f1e85c6620aa
DIFF: https://github.com/llvm/llvm-project/commit/cd80cd3d14c6140e5383eb2b6157f1e85c6620aa.diff
LOG: [AArch64] Fix schedmodel zero latency moves for Neoverse V2
Model some register-to-register move operations and move immediate
operations as "zero latency moves", as described in the Software
Optimisation Guide (SOG), ยง4.12:
https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2/
Differential Revision: https://reviews.llvm.org/D159443
Added:
llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
Modified:
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index b8e1dee705022c1..eca7700d5ff6ae9 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -88,6 +88,10 @@ def : WriteRes<WriteLDHi, []> { let Latency = 4; }
// Define customized scheduler read/write types specific to the Neoverse V2.
//===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types
+def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
// Define generic 1 micro-op types
def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; }
@@ -900,6 +904,18 @@ def V2Write_StrHQ : SchedWriteVariant<[
SchedVar<NeoverseHQForm, [V2Write_2cyc_1L01_1V01_1I]>,
SchedVar<NoSchedPred, [V2Write_2cyc_1L01_1V01]>]>;
+def V2Write_0or1cyc_1I : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+ SchedVar<NoSchedPred, [V2Write_1cyc_1I]>]>;
+
+def V2Write_0or2cyc_1V : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1V]>]>;
+
+def V2Write_0or3cyc_1M0 : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+ SchedVar<NoSchedPred, [V2Write_3cyc_1M0]>]>;
+
def V2Write_2or3cyc_1M : SchedWriteVariant<[
SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M]>,
SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
@@ -1092,9 +1108,9 @@ def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
// ALU, basic
// ALU, basic, flagset
-def : SchedAlias<WriteI, V2Write_1cyc_1I>;
-def : InstRW<[V2Write_1cyc_1F],
- (instregex "^(ADC|SBC)S[WX]r$")>;
+def : SchedAlias<WriteI, V2Write_1cyc_1I>;
+def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>;
+def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>;
// ALU, extend and shift
def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
@@ -1122,8 +1138,8 @@ def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
// Logical, shift, no flagset
-def : InstRW<[V2Write_1cyc_1I],
- (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[V2Write_1cyc_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>;
// Logical, shift, flagset
def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -1318,7 +1334,8 @@ def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
// FP transfer, from gen to low half of vec reg
-def : InstRW<[V2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+def : InstRW<[V2Write_0or3cyc_1M0],
+ (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
// FP transfer, from gen to high half of vec reg
def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1662,6 +1679,7 @@ def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
// ASIMD transpose
// ASIMD unzip/zip
// Handled by SchedAlias<WriteV[dq], ...>
+def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>;
// ASIMD duplicate, gen reg
def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
index a4c6cd4b978f459..97abec10f79429b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
@@ -54,3 +54,30 @@ def NeoverseMULIdiomPred : MCSchedPredicate< // <op> Rd, Rs, Rv, ZR
SMADDLrrr, UMADDLrrr,
SMSUBLrrr, UMSUBLrrr]>,
CheckIsReg3Zero]>>;
+
+def NeoverseZeroMove : MCSchedPredicate<
+ CheckAny<[
+ // MOV Wd, #0
+ // MOV Xd, #0
+ CheckAll<[CheckOpcode<[MOVZWi, MOVZXi]>,
+ CheckAll<[CheckImmOperand<1, 0>,
+ CheckImmOperand<2, 0>]>]>,
+ // MOV Wd, WZR
+ // MOV Xd, XZR
+ // MOV Wd, Wn
+ // MOV Xd, Xn
+ CheckAll<[CheckOpcode<[ORRWrs, ORRXrs]>,
+ CheckAll<[CheckIsReg1Zero,
+ CheckImmOperand<3, 0>]>]>,
+ // FMOV Hd, WZR
+ // FMOV Hd, XZR
+ // FMOV Sd, WZR
+ // FMOV Dd, XZR
+ CheckAll<[CheckOpcode<[FMOVWHr, FMOVXHr,
+ FMOVWSr, FMOVXDr]>,
+ CheckIsReg1Zero]>,
+ // MOVI Dd, #0
+ // MOVI Vd.2D, #0
+ CheckAll<[CheckOpcode<[MOVID, MOVIv2d_ns]>,
+ CheckImmOperand<1, 0>]>
+ ]>>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
index c71a31c97c00a33..20a38a55c1be1db 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
@@ -2532,14 +2532,14 @@ drps
# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
-# CHECK-NEXT: 1 1 0.17 mov x3, x6
-# CHECK-NEXT: 1 1 0.17 mov x3, xzr
-# CHECK-NEXT: 1 1 0.17 mov wzr, w2
-# CHECK-NEXT: 1 1 0.17 mov w3, w5
+# CHECK-NEXT: 1 0 0.06 mov x3, x6
+# CHECK-NEXT: 1 0 0.06 mov x3, xzr
+# CHECK-NEXT: 1 0 0.06 mov wzr, w2
+# CHECK-NEXT: 1 0 0.06 mov w3, w5
# CHECK-NEXT: 1 1 0.17 movz w2, #0, lsl #16
# CHECK-NEXT: 1 1 0.17 mov w2, #-1235
# CHECK-NEXT: 1 1 0.17 mov x2, #5299989643264
-# CHECK-NEXT: 1 1 0.17 mov x2, #0
+# CHECK-NEXT: 1 0 0.06 mov x2, #0
# CHECK-NEXT: 1 1 0.17 movk w3, #0
# CHECK-NEXT: 1 1 0.17 movz x4, #0, lsl #16
# CHECK-NEXT: 1 1 0.17 movk w5, #0, lsl #16
@@ -2585,7 +2585,7 @@ drps
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 11.00 11.00 33.00 33.00 99.00 165.00 165.00 327.42 182.42 110.42 110.42 92.67 92.67 190.00 146.00 30.00 10.00
+# CHECK-NEXT: 11.00 11.00 33.00 33.00 99.00 165.00 165.00 326.58 181.58 109.58 109.58 91.83 91.83 190.00 146.00 30.00 10.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -3732,14 +3732,14 @@ drps
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - - - - - bics x3, xzr, x3, lsl #1
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - - - - - tst w3, w7, lsl #31
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - - - - - tst x2, x20, asr #2
-# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x3, x6
-# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x3, xzr
-# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov wzr, w2
-# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x3, x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x3, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov wzr, w2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w3, w5
# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movz w2, #0, lsl #16
# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov w2, #-1235
# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x2, #5299989643264
-# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x2, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x2, #0
# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movk w3, #0
# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movz x4, #0, lsl #16
# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movk w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
new file mode 100644
index 000000000000000..aecd89093900faa
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
@@ -0,0 +1,75 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v2 -instruction-tables < %s | FileCheck %s
+
+mov x1, #0
+mov x1, xzr
+mov w1, #0
+mov w1, wzr
+fmov h1, wzr
+fmov h1, xzr
+fmov s1, wzr
+fmov d1, xzr
+movi d1, #0
+movi v1.2d, #0
+mov w1, w2
+mov x1, x2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.06 mov x1, #0
+# CHECK-NEXT: 1 0 0.06 mov x1, xzr
+# CHECK-NEXT: 1 0 0.06 mov w1, #0
+# CHECK-NEXT: 1 0 0.06 mov w1, wzr
+# CHECK-NEXT: 1 0 0.06 fmov h1, wzr
+# CHECK-NEXT: 1 0 0.06 fmov h1, xzr
+# CHECK-NEXT: 1 0 0.06 fmov s1, wzr
+# CHECK-NEXT: 1 0 0.06 fmov d1, xzr
+# CHECK-NEXT: 1 0 0.06 movi d1, #0000000000000000
+# CHECK-NEXT: 1 0 0.06 movi v1.2d, #0000000000000000
+# CHECK-NEXT: 1 0 0.06 mov w1, w2
+# CHECK-NEXT: 1 0 0.06 mov x1, x2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V2UnitB
+# CHECK-NEXT: [0.1] - V2UnitB
+# CHECK-NEXT: [1.0] - V2UnitD
+# CHECK-NEXT: [1.1] - V2UnitD
+# CHECK-NEXT: [2] - V2UnitL2
+# CHECK-NEXT: [3.0] - V2UnitL01
+# CHECK-NEXT: [3.1] - V2UnitL01
+# CHECK-NEXT: [4] - V2UnitM0
+# CHECK-NEXT: [5] - V2UnitM1
+# CHECK-NEXT: [6] - V2UnitS0
+# CHECK-NEXT: [7] - V2UnitS1
+# CHECK-NEXT: [8] - V2UnitS2
+# CHECK-NEXT: [9] - V2UnitS3
+# CHECK-NEXT: [10] - V2UnitV0
+# CHECK-NEXT: [11] - V2UnitV1
+# CHECK-NEXT: [12] - V2UnitV2
+# CHECK-NEXT: [13] - V2UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x1, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w1, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov h1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov h1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov s1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov d1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - movi d1, #0000000000000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - movi v1.2d, #0000000000000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x1, x2
More information about the llvm-commits
mailing list