[llvm] cd80cd3 - [AArch64] Fix schedmodel zero latency moves for Neoverse V2

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 7 02:55:56 PDT 2023


Author: Ricardo Jesus
Date: 2023-09-07T10:54:38+01:00
New Revision: cd80cd3d14c6140e5383eb2b6157f1e85c6620aa

URL: https://github.com/llvm/llvm-project/commit/cd80cd3d14c6140e5383eb2b6157f1e85c6620aa
DIFF: https://github.com/llvm/llvm-project/commit/cd80cd3d14c6140e5383eb2b6157f1e85c6620aa.diff

LOG: [AArch64] Fix schedmodel zero latency moves for Neoverse V2

Model some register-to-register move operations and move immediate
operations as "zero latency moves", as described in the Software
Optimisation Guide (SOG), ยง4.12:

https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2/

Differential Revision: https://reviews.llvm.org/D159443

Added: 
    llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s

Modified: 
    llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
    llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
    llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index b8e1dee705022c1..eca7700d5ff6ae9 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -88,6 +88,10 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 // Define customized scheduler read/write types specific to the Neoverse V2.
 
 //===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types
+def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
 // Define generic 1 micro-op types
 
 def V2Write_1cyc_1B    : SchedWriteRes<[V2UnitB]>   { let Latency = 1; }
@@ -900,6 +904,18 @@ def V2Write_StrHQ : SchedWriteVariant<[
                       SchedVar<NeoverseHQForm,  [V2Write_2cyc_1L01_1V01_1I]>,
                       SchedVar<NoSchedPred,     [V2Write_2cyc_1L01_1V01]>]>;
 
+def V2Write_0or1cyc_1I : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+                      SchedVar<NoSchedPred,      [V2Write_1cyc_1I]>]>;
+
+def V2Write_0or2cyc_1V : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+                      SchedVar<NoSchedPred,      [V2Write_2cyc_1V]>]>;
+
+def V2Write_0or3cyc_1M0 : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
+                      SchedVar<NoSchedPred,      [V2Write_3cyc_1M0]>]>;
+
 def V2Write_2or3cyc_1M : SchedWriteVariant<[
                       SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M]>,
                       SchedVar<NoSchedPred,     [V2Write_2cyc_1M]>]>;
@@ -1092,9 +1108,9 @@ def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
 
 // ALU, basic
 // ALU, basic, flagset
-def : SchedAlias<WriteI,     V2Write_1cyc_1I>;
-def : InstRW<[V2Write_1cyc_1F],
-             (instregex "^(ADC|SBC)S[WX]r$")>;
+def : SchedAlias<WriteI, V2Write_1cyc_1I>;
+def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>;
+def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>;
 
 // ALU, extend and shift
 def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
@@ -1122,8 +1138,8 @@ def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
 def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
 
 // Logical, shift, no flagset
-def : InstRW<[V2Write_1cyc_1I],
-             (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[V2Write_1cyc_1I],    (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>;
 
 // Logical, shift, flagset
 def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -1318,7 +1334,8 @@ def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
 def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
 
 // FP transfer, from gen to low half of vec reg
-def : InstRW<[V2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+def : InstRW<[V2Write_0or3cyc_1M0],
+             (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
 
 // FP transfer, from gen to high half of vec reg
 def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1662,6 +1679,7 @@ def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
 // ASIMD transpose
 // ASIMD unzip/zip
 // Handled by SchedAlias<WriteV[dq], ...>
+def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>;
 
 // ASIMD duplicate, gen reg
 def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;

diff  --git a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
index a4c6cd4b978f459..97abec10f79429b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
@@ -54,3 +54,30 @@ def NeoverseMULIdiomPred : MCSchedPredicate< // <op> Rd, Rs, Rv, ZR
                                           SMADDLrrr, UMADDLrrr,
                                           SMSUBLrrr, UMSUBLrrr]>,
                                        CheckIsReg3Zero]>>;
+
+def NeoverseZeroMove : MCSchedPredicate<
+                         CheckAny<[
+                           // MOV Wd, #0
+                           // MOV Xd, #0
+                           CheckAll<[CheckOpcode<[MOVZWi, MOVZXi]>,
+                                     CheckAll<[CheckImmOperand<1, 0>,
+                                               CheckImmOperand<2, 0>]>]>,
+                           // MOV Wd, WZR
+                           // MOV Xd, XZR
+                           // MOV Wd, Wn
+                           // MOV Xd, Xn
+                           CheckAll<[CheckOpcode<[ORRWrs, ORRXrs]>,
+                                     CheckAll<[CheckIsReg1Zero,
+                                               CheckImmOperand<3, 0>]>]>,
+                           // FMOV Hd, WZR
+                           // FMOV Hd, XZR
+                           // FMOV Sd, WZR
+                           // FMOV Dd, XZR
+                           CheckAll<[CheckOpcode<[FMOVWHr, FMOVXHr,
+                                                  FMOVWSr, FMOVXDr]>,
+                                     CheckIsReg1Zero]>,
+                           // MOVI Dd, #0
+                           // MOVI Vd.2D, #0
+                           CheckAll<[CheckOpcode<[MOVID, MOVIv2d_ns]>,
+                                     CheckImmOperand<1, 0>]>
+                         ]>>;

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
index c71a31c97c00a33..20a38a55c1be1db 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
@@ -2532,14 +2532,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  1      1     0.17                        mov	x3, x6
-# CHECK-NEXT:  1      1     0.17                        mov	x3, xzr
-# CHECK-NEXT:  1      1     0.17                        mov	wzr, w2
-# CHECK-NEXT:  1      1     0.17                        mov	w3, w5
+# CHECK-NEXT:  1      0     0.06                        mov	x3, x6
+# CHECK-NEXT:  1      0     0.06                        mov	x3, xzr
+# CHECK-NEXT:  1      0     0.06                        mov	wzr, w2
+# CHECK-NEXT:  1      0     0.06                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.17                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.17                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.17                        mov	x2, #5299989643264
-# CHECK-NEXT:  1      1     0.17                        mov	x2, #0
+# CHECK-NEXT:  1      0     0.06                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.17                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.17                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.17                        movk	w5, #0, lsl #16
@@ -2585,7 +2585,7 @@ drps
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT: 11.00  11.00  33.00  33.00  99.00  165.00 165.00 327.42 182.42 110.42 110.42 92.67  92.67  190.00 146.00 30.00  10.00
+# CHECK-NEXT: 11.00  11.00  33.00  33.00  99.00  165.00 165.00 326.58 181.58 109.58 109.58 91.83  91.83  190.00 146.00 30.00  10.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
@@ -3732,14 +3732,14 @@ drps
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -     bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -     tst	w3, w7, lsl #31
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -     tst	x2, x20, asr #2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     mov	x3, x6
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     mov	x3, xzr
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     mov	wzr, w2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     mov	w3, w5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, x6
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, xzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	wzr, w2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	w3, w5
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     movz	w2, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     mov	w2, #-1235
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     mov	x2, #5299989643264
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     mov	x2, #0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x2, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     movk	w3, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     movz	x4, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.17   0.17   0.17   0.17   0.17   0.17    -      -      -      -     movk	w5, #0, lsl #16

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
new file mode 100644
index 000000000000000..aecd89093900faa
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
@@ -0,0 +1,75 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v2 -instruction-tables < %s | FileCheck %s
+
+mov  x1, #0
+mov  x1, xzr
+mov  w1, #0
+mov  w1, wzr
+fmov h1, wzr
+fmov h1, xzr
+fmov s1, wzr
+fmov d1, xzr
+movi d1, #0
+movi v1.2d, #0
+mov  w1, w2
+mov  x1, x2
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      0     0.06                        mov	x1, #0
+# CHECK-NEXT:  1      0     0.06                        mov	x1, xzr
+# CHECK-NEXT:  1      0     0.06                        mov	w1, #0
+# CHECK-NEXT:  1      0     0.06                        mov	w1, wzr
+# CHECK-NEXT:  1      0     0.06                        fmov	h1, wzr
+# CHECK-NEXT:  1      0     0.06                        fmov	h1, xzr
+# CHECK-NEXT:  1      0     0.06                        fmov	s1, wzr
+# CHECK-NEXT:  1      0     0.06                        fmov	d1, xzr
+# CHECK-NEXT:  1      0     0.06                        movi	d1, #0000000000000000
+# CHECK-NEXT:  1      0     0.06                        movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  1      0     0.06                        mov	w1, w2
+# CHECK-NEXT:  1      0     0.06                        mov	x1, x2
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - V2UnitB
+# CHECK-NEXT: [0.1] - V2UnitB
+# CHECK-NEXT: [1.0] - V2UnitD
+# CHECK-NEXT: [1.1] - V2UnitD
+# CHECK-NEXT: [2]   - V2UnitL2
+# CHECK-NEXT: [3.0] - V2UnitL01
+# CHECK-NEXT: [3.1] - V2UnitL01
+# CHECK-NEXT: [4]   - V2UnitM0
+# CHECK-NEXT: [5]   - V2UnitM1
+# CHECK-NEXT: [6]   - V2UnitS0
+# CHECK-NEXT: [7]   - V2UnitS1
+# CHECK-NEXT: [8]   - V2UnitS2
+# CHECK-NEXT: [9]   - V2UnitS3
+# CHECK-NEXT: [10]  - V2UnitV0
+# CHECK-NEXT: [11]  - V2UnitV1
+# CHECK-NEXT: [12]  - V2UnitV2
+# CHECK-NEXT: [13]  - V2UnitV3
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x1, #0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x1, xzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	w1, #0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	w1, wzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fmov	h1, wzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fmov	h1, xzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fmov	s1, wzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     fmov	d1, xzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movi	d1, #0000000000000000
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	w1, w2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x1, x2


        


More information about the llvm-commits mailing list