[llvm] 4624668 - [AArch64] Update Neoverse V2 FSQRT execution units in schedule model. (#86803)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 2 02:47:55 PDT 2024
Author: Rin Dobrescu
Date: 2024-04-02T10:47:51+01:00
New Revision: 46246683a61a081d9d78cf987fd4f024556ecdc8
URL: https://github.com/llvm/llvm-project/commit/46246683a61a081d9d78cf987fd4f024556ecdc8
DIFF: https://github.com/llvm/llvm-project/commit/46246683a61a081d9d78cf987fd4f024556ecdc8.diff
LOG: [AArch64] Update Neoverse V2 FSQRT execution units in schedule model. (#86803)
This patch updates the SVE FSQRT instruction execution units to be able to run on VX0 and VX2.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index f10b94523d2e03..4d7f44e7b9b9ab 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -1076,14 +1076,12 @@ def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let
def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
-def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
-def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; }
def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
+def V2Write_16cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [14]; }
def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
-def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; }
// Miscellaneous
// -----------------------------------------------------------------------------
@@ -2567,13 +2565,13 @@ def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H")>;
+def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S")>;
+def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D")>;
+def : InstRW<[V2Write_16cyc_1V02_14rc], (instregex "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric exponentiation
def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
index 4d6ce706b05274..acd35568249499 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
@@ -4278,9 +4278,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 0.25 fscale z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 3 0.25 fscale z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: 1 3 0.25 fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 16 14.00 fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 13 12.00 fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 10 9.00 fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 16 7.00 fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 13 6.00 fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 10 4.50 fsqrt z31.s, p7/m, z31.s
# CHECK-NEXT: 1 2 0.25 fsub z0.d, p0/m, z0.d, #0.5
# CHECK-NEXT: 1 2 0.25 fsub z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 2 0.25 fsub z0.d, z1.d, z31.d
@@ -6861,7 +6861,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1554.25 1281.75 776.75 748.25
+# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1536.75 1281.75 794.25 748.25
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -7718,9 +7718,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - 14.00 - - - fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - 12.00 - - - fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - 9.00 - - - fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - 7.00 - 7.00 - fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - 6.00 - 6.00 - fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - 4.50 - 4.50 - fsqrt z31.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p0/m, z0.d, #0.5
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, z1.d, z31.d
More information about the llvm-commits
mailing list