[llvm] [AArch64] Update Neoverse V2 FSQRT execution units in schedule model. (PR #86803)
Rin Dobrescu via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 06:49:12 PDT 2024
https://github.com/Rin18 created https://github.com/llvm/llvm-project/pull/86803
This patch updates the SVE FSQRT instruction execution units to be able to run on VX0 and VX2.
>From 32407630da59bd4d5fadbebde6e17ce1eb2e1b9a Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Wed, 27 Mar 2024 12:42:11 +0000
Subject: [PATCH] [AArch64] Update Neoverse V2 FSQRT execution units in
schedule model.
---
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td | 10 ++++------
.../AArch64/Neoverse/V2-sve-instructions.s | 14 +++++++-------
2 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index f10b94523d2e03..4d7f44e7b9b9ab 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -1076,14 +1076,12 @@ def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let
def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
-def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
-def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; }
def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
+def V2Write_16cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [14]; }
def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
-def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; }
// Miscellaneous
// -----------------------------------------------------------------------------
@@ -2567,13 +2565,13 @@ def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H")>;
+def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S")>;
+def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D")>;
+def : InstRW<[V2Write_16cyc_1V02_14rc], (instregex "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric exponentiation
def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
index 4d6ce706b05274..acd35568249499 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
@@ -4278,9 +4278,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 0.25 fscale z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 3 0.25 fscale z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: 1 3 0.25 fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 16 14.00 fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 13 12.00 fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 10 9.00 fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 16 7.00 fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 13 6.00 fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 10 4.50 fsqrt z31.s, p7/m, z31.s
# CHECK-NEXT: 1 2 0.25 fsub z0.d, p0/m, z0.d, #0.5
# CHECK-NEXT: 1 2 0.25 fsub z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 2 0.25 fsub z0.d, z1.d, z31.d
@@ -6861,7 +6861,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1554.25 1281.75 776.75 748.25
+# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1536.75 1281.75 794.25 748.25
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -7718,9 +7718,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - 14.00 - - - fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - 12.00 - - - fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - 9.00 - - - fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - 7.00 - 7.00 - fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - 6.00 - 6.00 - fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - 4.50 - 4.50 - fsqrt z31.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p0/m, z0.d, #0.5
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, z1.d, z31.d
More information about the llvm-commits
mailing list