[llvm] [RISCV] Fix the worst case for VSHA2MS in SiFive P400/P600 scheduling models (PR #116893)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 19 16:06:05 PST 2024
https://github.com/mshockwave created https://github.com/llvm/llvm-project/pull/116893
For each RVV instruction we should have a single WriteRes assignment to the worst case scheduling class. This assignment is usually equal to that of the largest LMUL + smallest SEW. My #114317 accidentally made two of these assignments on `WriteVSHA2MSV_WorstCase`. This won't affect our MachineScheduler nor most of our llvm-mca use cases (assuming you populate the correct LMUL and SEW), yet it's not ideal either.
This patch fixes this issue by assigning the correct numbers and resource mapping to `WriteVSHA2MSV_WorstCase`, which is equal to that of largest LMUL + _largest_ SEW (Zvknh's scheduling properties are special). I also added a MCA test to make sure we always pick up the correct worst case numbers for P600's scheduling model.
Original issue was reported by @reidtatge
>From 7b087f0461b1de712d258d79fb310836a86529b9 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 19 Nov 2024 15:43:33 -0800
Subject: [PATCH] [RISCV] Fix the worst case for VSHA2MS in SiFive P400/P600
scheduling models
TBA...
---
llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 12 ++++++++++--
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 11 +++++++++--
.../tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s | 19 ++++++++++++-------
3 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index 1af89903e0068c..a86c255f0820ed 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -883,8 +883,16 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
- foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
- defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
+ defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]);
+ // Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending
+ // order.
+ defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr);
+ foreach sew = ZvknhSEWs in {
+ // The worst case for Zvknh[ab] is designated to the largest SEW and LMUL.
+ defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW));
+ defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew,
+ IsWorstCaseVSHA2MSV>;
+ }
}
// Zvkned
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index c2d93d4c0a7f0a..0c695c9ef30710 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -1135,9 +1135,16 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
- foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in {
+ defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]);
+ // Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending
+ // order.
+ defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr);
+ foreach sew = ZvknhSEWs in {
+ // The worst case for Zvknh[ab] is designated to the largest SEW and LMUL.
+ defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW));
let ReleaseAtCycles = [SiFiveP600VSHA2MSCycles<mx, sew>.c] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew,
+ IsWorstCaseVSHA2MSV>;
}
}
// Zvkned
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
index 20ac87a724af16..adf780279c8954 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
@@ -1,6 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s
+# Worst case for vsha2ms should be that of LMUL=8 and SEW=64.
+vsha2ms.vv v4, v8, v12
+
# SEW is only e32 or e64
vsetvli zero, zero, e32, m1, tu, mu
@@ -44,14 +47,14 @@ vsha2ch.vv v8, v16, v24
vsha2cl.vv v8, v16, v24
# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 32
-# CHECK-NEXT: Total Cycles: 108
-# CHECK-NEXT: Total uOps: 32
+# CHECK-NEXT: Instructions: 33
+# CHECK-NEXT: Total Cycles: 119
+# CHECK-NEXT: Total uOps: 33
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.30
-# CHECK-NEXT: IPC: 0.30
-# CHECK-NEXT: Block RThroughput: 97.0
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 109.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -62,6 +65,7 @@ vsha2cl.vv v8, v16, v24
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 12.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
# CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12
@@ -115,10 +119,11 @@ vsha2cl.vv v8, v16, v24
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - 8.00 - - - - - - 97.00 - - - -
+# CHECK-NEXT: - - - - 8.00 - - - - - - 109.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - 12.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12
More information about the llvm-commits
mailing list