[llvm] [RISCV] Refine division latencies in SiFive P600's scheduling model (PR #115038)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 09:57:19 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Min-Yih Hsu (mshockwave)
<details>
<summary>Changes</summary>
For both integer and floating point divisions.
---
Patch is 88.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115038.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td (+11-4)
- (added) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s (+1012)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 51aa003139fbad..a9e3c6766d1214 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -443,12 +443,16 @@ foreach mx = SchedMxListW in {
}
}
-// Worst case needs 64 cycles if SEW is equal to 64.
+// Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64.
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = 64, ReleaseAtCycles = [LMulLat, !mul(63, LMulLat)] in {
+ defvar DivMicroOpLat =
+ !cond(!eq(sew, 8): 51, !eq(sew, 16): 45, !eq(sew, 32): 42,
+ /* SEW=64 */ true: 72);
+ defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
+ let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>;
}
@@ -583,12 +587,15 @@ foreach mx = SchedMxListFW in {
}
}
-// Worst case needs 76 cycles if SEW is equal to 64.
+// Worst case needs around 29/25/37 * LMUL cycles for f16/32/64.
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- let Latency = 76, ReleaseAtCycles = [LMulLat, !mul(76, LMulLat)] in {
+ defvar DivMicroOpLat =
+ !cond(!eq(sew, 16): 29, !eq(sew, 32): 25, /* SEW=64 */ true: 37);
+ defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
+ let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>;
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s
new file mode 100644
index 00000000000000..0b7cc95dcb8d62
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s
@@ -0,0 +1,1012 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, mf4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, mf2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m8, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, mf8, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, mf4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, mf2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m8, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, mf8, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, mf4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, mf2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m8, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, mf8, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, mf4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, mf2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m1, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m2, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m4, tu, mu
+vdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m8, tu, mu
+vdiv.vv v8, v16, v24
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e8, mf4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e8, mf2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e8, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e8, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e8, m2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e8, m4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e8, m8, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, mf8, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, mf4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, mf2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, m2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, m4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e16, m8, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, mf8, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, mf4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, mf2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, m2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, m4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e32, m8, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, mf8, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, mf4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, mf2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, m1, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, m2, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, m4, tu, mu
+vdiv.vx v8, v16, a0
+vsetvli zero, zero, e64, m8, tu, mu
+vdiv.vx v8, v16, a0
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, mf4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, mf2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e8, m8, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, mf8, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, mf4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, mf2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e16, m8, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, mf8, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, mf4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, mf2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e32, m8, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, mf8, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, mf4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, mf2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m1, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m2, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m4, tu, mu
+vfdiv.vv v8, v16, v24
+vsetvli zero, zero, e64, m8, tu, mu
+vfdiv.vv v8, v16, v24
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e8, mf4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e8, mf2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e8, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e8, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e8, m2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e8, m4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e8, m8, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, mf8, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, mf4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, mf2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, m2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, m4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e16, m8, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, mf8, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, mf4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, mf2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, m2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, m4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e32, m8, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, mf8, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, mf4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, mf2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, m1, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, m2, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, m4, tu, mu
+vfdiv.vf v8, v16, fa0
+vsetvli zero, zero, e64, m8, tu, mu
+vfdiv.vf v8, v16, fa0
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e8, mf4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e8, mf2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e8, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e8, m2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e8, m4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e8, m8, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, mf8, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, mf4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, mf2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, m2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, m4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e16, m8, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, mf8, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, mf4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, mf2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, m2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, m4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e32, m8, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, mf8, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, mf4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, mf2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, m1, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, m2, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, m4, tu, mu
+vfsqrt.v v8, v16
+vsetvli zero, zero, e64, m8, tu, mu
+vfsqrt.v v8, v16
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 320
+# CHECK-NEXT: Total Cycles: 14613
+# CHECK-NEXT: Total uOps: 320
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.02
+# CHECK-NEXT: IPC: 0.02
+# CHECK-NEXT: Block RThroughput: 14361.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 102 102.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 204 204.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 90 90.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 180 180.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 360 360.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 42 42.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 42 42.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 42 42.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 84 84.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 168 168.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 336 336.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 72 72.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 72 72.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu
+# CHECK-NEXT: 1 144 144.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu
+# CHECK-NEXT: 1 288 288.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu
+# CHECK-NEXT: 1 576 576.00 vdiv.vv v8, v16, v24
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 51 51.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 102 102.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 204 204.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu
+# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 45 45.00 vdiv.vx v8, v16, a0
+# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 90 90.00 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/115038
More information about the llvm-commits
mailing list