[clang] [llvm] [RISCV] Add scheduling model for Syntacore SCR3 (PR #95427)

Anton Sidorenko via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 14 06:32:37 PDT 2024


https://github.com/asi-sc updated https://github.com/llvm/llvm-project/pull/95427

>From 1449d6ba48779051f19dcf9160aaa40599e2750e Mon Sep 17 00:00:00 2001
From: Anton Sidorenko <anton.sidorenko at syntacore.com>
Date: Fri, 31 May 2024 16:10:28 +0300
Subject: [PATCH 1/2] [RISCV] Add scheduling model for Syntacore SCR3

Syntacore SCR3 is a microcontroller-class processor core.
Overview: https://syntacore.com/products/scr3
This PR introduces two CPUs:
  * 'syntacore-scr3-rv32' which is rv32imc
  * 'syntacore-scr3-rv64' which is rv64imac

Co-authored-by: Dmitrii Petrov <dmitrii.petrov at syntacore.com>
---
 clang/test/Misc/target-invalid-cpu-note.c     |   8 +-
 llvm/lib/Target/RISCV/RISCV.td                |   1 +
 llvm/lib/Target/RISCV/RISCVProcessors.td      |  21 ++
 .../Target/RISCV/RISCVSchedSyntacoreSCR3.td   | 266 ++++++++++++++++++
 .../RISCV/SyntacoreSCR/SCR3-dmaadmaa.s        |  91 ++++++
 .../llvm-mca/RISCV/SyntacoreSCR/SCR3-llll.s   |  57 ++++
 6 files changed, 440 insertions(+), 4 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR3.td
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s
 create mode 100644 llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-llll.s

diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 5362c6f882c25..90dbd3ef37342 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -81,16 +81,16 @@
 
 // RUN: not %clang_cc1 -triple riscv32 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV32
 // RISCV32: error: unknown target CPU 'not-a-cpu'
-// RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max{{$}}
+// RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max, syntacore-scr3-rv32{{$}}
 
 // RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64
 // RISCV64: error: unknown target CPU 'not-a-cpu'
-// RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-p450, sifive-p670, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, veyron-v1, xiangshan-nanhu{{$}}
+// RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-p450, sifive-p670, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, syntacore-scr3-rv64, veyron-v1, xiangshan-nanhu{{$}}
 
 // RUN: not %clang_cc1 -triple riscv32 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV32
 // TUNE-RISCV32: error: unknown target CPU 'not-a-cpu'
-// TUNE-RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max, generic, rocket, sifive-7-series{{$}}
+// TUNE-RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max, syntacore-scr3-rv32, generic, rocket, sifive-7-series{{$}}
 
 // RUN: not %clang_cc1 -triple riscv64 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV64
 // TUNE-RISCV64: error: unknown target CPU 'not-a-cpu'
-// TUNE-RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-p450, sifive-p670, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, veyron-v1, xiangshan-nanhu, generic, rocket, sifive-7-series{{$}}
+// TUNE-RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-p450, sifive-p670, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, syntacore-scr3-rv64, veyron-v1, xiangshan-nanhu, generic, rocket, sifive-7-series{{$}}
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 09f496574d64a..d96fafbe60807 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -51,6 +51,7 @@ include "RISCVSchedSiFive7.td"
 include "RISCVSchedSiFiveP400.td"
 include "RISCVSchedSiFiveP600.td"
 include "RISCVSchedSyntacoreSCR1.td"
+include "RISCVSchedSyntacoreSCR3.td"
 include "RISCVSchedXiangShanNanHu.td"
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 6ebf9f1eb0452..822af1c6dcbbc 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -326,6 +326,27 @@ def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
                                               FeatureStdExtC],
                                              [TuneNoDefaultUnroll]>;
 
+def SYNTACORE_SCR3_RV32 : RISCVProcessorModel<"syntacore-scr3-rv32",
+                                              SyntacoreSCR3RV32Model,
+                                              [Feature32Bit,
+                                               FeatureStdExtI,
+                                               FeatureStdExtZicsr,
+                                               FeatureStdExtZifencei,
+                                               FeatureStdExtM,
+                                               FeatureStdExtC],
+                                              [TuneNoDefaultUnroll, FeaturePostRAScheduler]>;
+
+def SYNTACORE_SCR3_RV64 : RISCVProcessorModel<"syntacore-scr3-rv64",
+                                              SyntacoreSCR3RV64Model,
+                                              [Feature64Bit,
+                                               FeatureStdExtI,
+                                               FeatureStdExtZicsr,
+                                               FeatureStdExtZifencei,
+                                               FeatureStdExtM,
+                                               FeatureStdExtA,
+                                               FeatureStdExtC],
+                                              [TuneNoDefaultUnroll, FeaturePostRAScheduler]>;
+
 def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                             NoSchedModel,
                                             [Feature64Bit,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR3.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR3.td
new file mode 100644
index 0000000000000..59509f500e65e
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR3.td
@@ -0,0 +1,266 @@
+//==- RISCVSchedSyntacoreSCR3.td - Syntacore SCR3 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+// This model covers SYNTACORE_SCR3_RV32IMC and SYNTACORE_RV64IMAC
+// configurations (syntacore-scr3-rv32/64).
+// Overview: https://syntacore.com/products/scr3
+
+// SCR3 is single-issue in-order processor
+class SyntacoreSCR3Model : SchedMachineModel {
+  let MicroOpBufferSize = 0;
+  let IssueWidth = 1;
+  let LoadLatency = 2;
+  let MispredictPenalty = 3;
+  let CompleteModel = 0;
+  let UnsupportedFeatures = [HasStdExtD, HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+                             HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+                             HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
+                             HasVInstructions];
+}
+
+// Branching
+multiclass SCR3_Branching<ProcResourceKind BRU> {
+  def : WriteRes<WriteJmp, [BRU]>;
+  def : WriteRes<WriteJal, [BRU]>;
+  def : WriteRes<WriteJalr, [BRU]>;
+}
+
+// Single-cycle integer arithmetic and logic
+multiclass SCR3_IntALU<ProcResourceKind ALU> {
+  def : WriteRes<WriteIALU, [ALU]>;
+  def : WriteRes<WriteIALU32, [ALU]>;
+  def : WriteRes<WriteShiftImm, [ALU]>;
+  def : WriteRes<WriteShiftImm32, [ALU]>;
+  def : WriteRes<WriteShiftReg, [ALU]>;
+  def : WriteRes<WriteShiftReg32, [ALU]>;
+}
+
+// Integer multiplication
+multiclass SCR3_IntMul<ProcResourceKind MUL> {
+  let Latency = 2 in {
+    def : WriteRes<WriteIMul, [MUL]>;
+    def : WriteRes<WriteIMul32, [MUL]>;
+  }
+}
+
+// Integer division
+multiclass SCR3_IntDiv<ProcResourceKind DIV, int DivLatency> {
+  let Latency = DivLatency, ReleaseAtCycles = [DivLatency] in {
+    def : WriteRes<WriteIDiv, [DIV]>;
+    def : WriteRes<WriteIDiv32, [DIV]>;
+    def : WriteRes<WriteIRem, [DIV]>;
+    def : WriteRes<WriteIRem32, [DIV]>;
+  }
+}
+
+// Load/store instructions on SCR3 have latency 2
+multiclass SCR3_Memory<ProcResourceKind LSU> {
+  let Latency = 2 in {
+    def : WriteRes<WriteSTB, [LSU]>;
+    def : WriteRes<WriteSTH, [LSU]>;
+    def : WriteRes<WriteSTW, [LSU]>;
+    def : WriteRes<WriteSTD, [LSU]>;
+    def : WriteRes<WriteLDB, [LSU]>;
+    def : WriteRes<WriteLDH, [LSU]>;
+    def : WriteRes<WriteLDW, [LSU]>;
+    def : WriteRes<WriteLDD, [LSU]>;
+  }
+}
+
+// Atomic memory
+multiclass SCR3_AtomicMemory<ProcResourceKind LSU> {
+  let Latency = 20 in {
+    def : WriteRes<WriteAtomicLDW, [LSU]>;
+    def : WriteRes<WriteAtomicLDD, [LSU]>;
+    def : WriteRes<WriteAtomicW, [LSU]>;
+    def : WriteRes<WriteAtomicD, [LSU]>;
+    def : WriteRes<WriteAtomicSTW, [LSU]>;
+    def : WriteRes<WriteAtomicSTD, [LSU]>;
+  }
+}
+
+// Others
+multiclass SCR3_Other {
+  def : WriteRes<WriteCSR, []>;
+  def : WriteRes<WriteNop, []>;
+
+  def : InstRW<[WriteIALU], (instrs COPY)>;
+}
+
+
+multiclass SCR3_Unsupported {
+  defm : UnsupportedSchedSFB;
+  defm : UnsupportedSchedV;
+  defm : UnsupportedSchedXsfvcp;
+  defm : UnsupportedSchedZabha;
+  defm : UnsupportedSchedZba;
+  defm : UnsupportedSchedZbb;
+  defm : UnsupportedSchedZbc;
+  defm : UnsupportedSchedZbs;
+  defm : UnsupportedSchedZbkb;
+  defm : UnsupportedSchedZbkx;
+  defm : UnsupportedSchedZfa;
+  defm : UnsupportedSchedZfh;
+  defm : UnsupportedSchedZvk;
+
+  let Unsupported = true in {
+    // FP load/store
+    def : WriteRes<WriteFST32, []>;
+    def : WriteRes<WriteFST64, []>;
+    def : WriteRes<WriteFLD32, []>;
+    def : WriteRes<WriteFLD64, []>;
+
+    // FP instructions
+    def : WriteRes<WriteFAdd32, []>;
+    def : WriteRes<WriteFSGNJ32, []>;
+    def : WriteRes<WriteFMinMax32, []>;
+    def : WriteRes<WriteFAdd64, []>;
+    def : WriteRes<WriteFSGNJ64, []>;
+    def : WriteRes<WriteFMinMax64, []>;
+    def : WriteRes<WriteFCvtI32ToF32, []>;
+    def : WriteRes<WriteFCvtI32ToF64, []>;
+    def : WriteRes<WriteFCvtI64ToF32, []>;
+    def : WriteRes<WriteFCvtI64ToF64, []>;
+    def : WriteRes<WriteFCvtF32ToI32, []>;
+    def : WriteRes<WriteFCvtF32ToI64, []>;
+    def : WriteRes<WriteFCvtF64ToI32, []>;
+    def : WriteRes<WriteFCvtF64ToI64, []>;
+    def : WriteRes<WriteFCvtF32ToF64, []>;
+    def : WriteRes<WriteFCvtF64ToF32, []>;
+    def : WriteRes<WriteFClass32, []>;
+    def : WriteRes<WriteFClass64, []>;
+    def : WriteRes<WriteFCmp32, []>;
+    def : WriteRes<WriteFCmp64, []>;
+    def : WriteRes<WriteFMovF32ToI32, []>;
+    def : WriteRes<WriteFMovI32ToF32, []>;
+    def : WriteRes<WriteFMovF64ToI64, []>;
+    def : WriteRes<WriteFMovI64ToF64, []>;
+    def : WriteRes<WriteFMul32, []>;
+    def : WriteRes<WriteFMA32, []>;
+    def : WriteRes<WriteFMul64, []>;
+    def : WriteRes<WriteFMA64, []>;
+    def : WriteRes<WriteFDiv32, []>;
+    def : WriteRes<WriteFDiv64, []>;
+    def : WriteRes<WriteFSqrt32, []>;
+    def : WriteRes<WriteFSqrt64, []>;
+  }
+}
+
+// Bypasses (none)
+multiclass SCR3_NoReadAdvances {
+  def : ReadAdvance<ReadJmp, 0>;
+  def : ReadAdvance<ReadJalr, 0>;
+  def : ReadAdvance<ReadCSR, 0>;
+  def : ReadAdvance<ReadStoreData, 0>;
+  def : ReadAdvance<ReadMemBase, 0>;
+  def : ReadAdvance<ReadIALU, 0>;
+  def : ReadAdvance<ReadIALU32, 0>;
+  def : ReadAdvance<ReadShiftImm, 0>;
+  def : ReadAdvance<ReadShiftImm32, 0>;
+  def : ReadAdvance<ReadShiftReg, 0>;
+  def : ReadAdvance<ReadShiftReg32, 0>;
+  def : ReadAdvance<ReadIDiv, 0>;
+  def : ReadAdvance<ReadIDiv32, 0>;
+  def : ReadAdvance<ReadIRem, 0>;
+  def : ReadAdvance<ReadIRem32, 0>;
+  def : ReadAdvance<ReadIMul, 0>;
+  def : ReadAdvance<ReadIMul32, 0>;
+  def : ReadAdvance<ReadAtomicWA, 0>;
+  def : ReadAdvance<ReadAtomicWD, 0>;
+  def : ReadAdvance<ReadAtomicDA, 0>;
+  def : ReadAdvance<ReadAtomicDD, 0>;
+  def : ReadAdvance<ReadAtomicLDW, 0>;
+  def : ReadAdvance<ReadAtomicLDD, 0>;
+  def : ReadAdvance<ReadAtomicSTW, 0>;
+  def : ReadAdvance<ReadAtomicSTD, 0>;
+  def : ReadAdvance<ReadFStoreData, 0>;
+  def : ReadAdvance<ReadFMemBase, 0>;
+  def : ReadAdvance<ReadFAdd32, 0>;
+  def : ReadAdvance<ReadFAdd64, 0>;
+  def : ReadAdvance<ReadFMul32, 0>;
+  def : ReadAdvance<ReadFMul64, 0>;
+  def : ReadAdvance<ReadFMA32, 0>;
+  def : ReadAdvance<ReadFMA32Addend, 0>;
+  def : ReadAdvance<ReadFMA64, 0>;
+  def : ReadAdvance<ReadFMA64Addend, 0>;
+  def : ReadAdvance<ReadFDiv32, 0>;
+  def : ReadAdvance<ReadFDiv64, 0>;
+  def : ReadAdvance<ReadFSqrt32, 0>;
+  def : ReadAdvance<ReadFSqrt64, 0>;
+  def : ReadAdvance<ReadFCmp32, 0>;
+  def : ReadAdvance<ReadFCmp64, 0>;
+  def : ReadAdvance<ReadFSGNJ32, 0>;
+  def : ReadAdvance<ReadFSGNJ64, 0>;
+  def : ReadAdvance<ReadFMinMax32, 0>;
+  def : ReadAdvance<ReadFMinMax64, 0>;
+  def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+  def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+  def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+  def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+  def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+  def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+  def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+  def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+  def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+  def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+  def : ReadAdvance<ReadFMovF32ToI32, 0>;
+  def : ReadAdvance<ReadFMovI32ToF32, 0>;
+  def : ReadAdvance<ReadFMovF64ToI64, 0>;
+  def : ReadAdvance<ReadFMovI64ToF64, 0>;
+  def : ReadAdvance<ReadFClass32, 0>;
+  def : ReadAdvance<ReadFClass64, 0>;
+  def : ReadAdvance<ReadSFBALU, 0>;
+}
+
+def SyntacoreSCR3RV32Model : SyntacoreSCR3Model;
+
+let SchedModel = SyntacoreSCR3RV32Model in {
+  let BufferSize = 0 in {
+    def SCR3RV32_ALU : ProcResource<1>;
+    def SCR3RV32_MUL : ProcResource<1>;
+    def SCR3RV32_DIV : ProcResource<1>;
+    def SCR3RV32_LSU : ProcResource<1>;
+    def SCR3RV32_CFU : ProcResource<1>;
+  }
+
+  defm : SCR3_Branching<SCR3RV32_CFU>;
+  defm : SCR3_IntALU<SCR3RV32_ALU>;
+  defm : SCR3_IntMul<SCR3RV32_MUL>;
+  defm : SCR3_IntDiv<SCR3RV32_DIV, /* div latency = */ 8>;
+  defm : SCR3_Memory<SCR3RV32_LSU>;
+  defm : SCR3_AtomicMemory<SCR3RV32_LSU>;
+  defm : SCR3_Other;
+
+  defm : SCR3_Unsupported;
+  defm : SCR3_NoReadAdvances;
+}
+
+def SyntacoreSCR3RV64Model : SyntacoreSCR3Model;
+
+let SchedModel = SyntacoreSCR3RV64Model in {
+  let BufferSize = 0 in {
+    def SCR3RV64_ALU : ProcResource<1>;
+    def SCR3RV64_MUL : ProcResource<1>;
+    def SCR3RV64_DIV : ProcResource<1>;
+    def SCR3RV64_LSU : ProcResource<1>;
+    def SCR3RV64_CFU : ProcResource<1>;
+  }
+
+  defm : SCR3_Branching<SCR3RV64_CFU>;
+  defm : SCR3_IntALU<SCR3RV64_ALU>;
+  defm : SCR3_IntMul<SCR3RV64_MUL>;
+  defm : SCR3_IntDiv<SCR3RV64_DIV, /* div latency = */ 11>;
+  defm : SCR3_Memory<SCR3RV64_LSU>;
+  defm : SCR3_AtomicMemory<SCR3RV64_LSU>;
+  defm : SCR3_Other;
+
+  defm : SCR3_Unsupported;
+  defm : SCR3_NoReadAdvances;
+}
diff --git a/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s b/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s
new file mode 100644
index 0000000000000..e5edf479fd1d6
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s
@@ -0,0 +1,91 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64-unknown-unknown -mcpu=syntacore-scr3-rv64 --iterations=2 < %s | FileCheck %s --check-prefixes=CHECK,RV64
+# RUN: llvm-mca -mtriple=riscv32-unknown-unknown -mcpu=syntacore-scr3-rv32 --iterations=2 < %s | FileCheck %s --check-prefixes=CHECK,RV32
+
+div a0, a0, a0
+mul t0, a0, t0
+add t1, a0, t0
+add t2, t2, t2
+div a1, a1, a1
+mul s0, a1, s0
+add s1, s0, s1
+add s2, s2, s2
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      16
+
+# RV32-NEXT:  Total Cycles:      45
+# RV64-NEXT:  Total Cycles:      61
+
+# CHECK-NEXT: Total uOps:        16
+
+# CHECK:      Dispatch Width:    1
+
+# RV32-NEXT:  uOps Per Cycle:    0.36
+# RV32-NEXT:  IPC:               0.36
+# RV32-NEXT:  Block RThroughput: 14.0
+
+# RV64-NEXT:  uOps Per Cycle:    0.26
+# RV64-NEXT:  IPC:               0.26
+# RV64-NEXT:  Block RThroughput: 22.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+
+# RV32-NEXT:   1      7     7.00                        div	a0, a0, a0
+# RV64-NEXT:   1      11    11.00                       div	a0, a0, a0
+
+# CHECK-NEXT:  1      2     1.00                        mul	t0, a0, t0
+# CHECK-NEXT:  1      1     1.00                        add	t1, a0, t0
+# CHECK-NEXT:  1      1     1.00                        add	t2, t2, t2
+
+# RV32-NEXT:   1      7     7.00                        div	a1, a1, a1
+# RV64-NEXT:   1      11    11.00                       div	a1, a1, a1
+
+# CHECK-NEXT:  1      2     1.00                        mul	s0, a1, s0
+# CHECK-NEXT:  1      1     1.00                        add	s1, s1, s0
+# CHECK-NEXT:  1      1     1.00                        add	s2, s2, s2
+
+# CHECK:      Resources:
+
+# RV32-NEXT:  [0]   - SCR3RV32_ALU
+# RV32-NEXT:  [1]   - SCR3RV32_CFU
+# RV32-NEXT:  [2]   - SCR3RV32_DIV
+# RV32-NEXT:  [3]   - SCR3RV32_LSU
+# RV32-NEXT:  [4]   - SCR3RV32_MUL
+
+# RV64-NEXT:  [0]   - SCR3RV64_ALU
+# RV64-NEXT:  [1]   - SCR3RV64_CFU
+# RV64-NEXT:  [2]   - SCR3RV64_DIV
+# RV64-NEXT:  [3]   - SCR3RV64_LSU
+# RV64-NEXT:  [4]   - SCR3RV64_MUL
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]
+
+# RV32-NEXT:  4.00    -     14.00   -     2.00
+# RV64-NEXT:  4.00    -     22.00   -     2.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    Instructions:
+
+# RV32-NEXT:   -      -     7.00    -      -     div	a0, a0, a0
+# RV64-NEXT:   -      -     11.00   -      -     div	a0, a0, a0
+
+# CHECK-NEXT:  -      -      -      -     1.00   mul	t0, a0, t0
+# CHECK-NEXT: 1.00    -      -      -      -     add	t1, a0, t0
+# CHECK-NEXT: 1.00    -      -      -      -     add	t2, t2, t2
+
+# RV32-NEXT:   -      -     7.00    -      -     div	a1, a1, a1
+# RV64-NEXT:   -      -     11.00   -      -     div	a1, a1, a1
+
+# CHECK-NEXT:  -      -      -      -     1.00   mul	s0, a1, s0
+# CHECK-NEXT: 1.00    -      -      -      -     add	s1, s1, s0
+# CHECK-NEXT: 1.00    -      -      -      -     add	s2, s2, s2
diff --git a/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-llll.s b/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-llll.s
new file mode 100644
index 0000000000000..e312c36907b35
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-llll.s
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64-unknown-unknown -mcpu=syntacore-scr3-rv64 --iterations=2 < %s | FileCheck %s --check-prefixes=CHECK,RV64
+# RUN: llvm-mca -mtriple=riscv32-unknown-unknown -mcpu=syntacore-scr3-rv32 --iterations=2 < %s | FileCheck %s --check-prefixes=CHECK,RV32
+
+lw a0, 0(s0)
+lw a1, 0(s0)
+lw a2, 0(s0)
+lw a3, 0(s0)
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      8
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Total uOps:        8
+
+# CHECK:      Dispatch Width:    1
+# CHECK-NEXT: uOps Per Cycle:    0.80
+# CHECK-NEXT: IPC:               0.80
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      2     1.00    *                   lw	a0, 0(s0)
+# CHECK-NEXT:  1      2     1.00    *                   lw	a1, 0(s0)
+# CHECK-NEXT:  1      2     1.00    *                   lw	a2, 0(s0)
+# CHECK-NEXT:  1      2     1.00    *                   lw	a3, 0(s0)
+
+# CHECK:      Resources:
+
+# RV32-NEXT:  [0]   - SCR3RV32_ALU
+# RV32-NEXT:  [1]   - SCR3RV32_CFU
+# RV32-NEXT:  [2]   - SCR3RV32_DIV
+# RV32-NEXT:  [3]   - SCR3RV32_LSU
+# RV32-NEXT:  [4]   - SCR3RV32_MUL
+
+# RV64-NEXT:  [0]   - SCR3RV64_ALU
+# RV64-NEXT:  [1]   - SCR3RV64_CFU
+# RV64-NEXT:  [2]   - SCR3RV64_DIV
+# RV64-NEXT:  [3]   - SCR3RV64_LSU
+# RV64-NEXT:  [4]   - SCR3RV64_MUL
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]
+# CHECK-NEXT:  -      -      -     4.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -     lw	a0, 0(s0)
+# CHECK-NEXT:  -      -      -     1.00    -     lw	a1, 0(s0)
+# CHECK-NEXT:  -      -      -     1.00    -     lw	a2, 0(s0)
+# CHECK-NEXT:  -      -      -     1.00    -     lw	a3, 0(s0)

>From f485010ab1e8fb1cdc88131c7e88e8f1846e6af3 Mon Sep 17 00:00:00 2001
From: Anton Sidorenko <anton.sidorenko at syntacore.com>
Date: Fri, 14 Jun 2024 16:32:06 +0300
Subject: [PATCH 2/2] fix test

---
 .../RISCV/SyntacoreSCR/SCR3-dmaadmaa.s         | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s b/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s
index e5edf479fd1d6..ea7943f180fda 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SyntacoreSCR/SCR3-dmaadmaa.s
@@ -14,16 +14,16 @@ add s2, s2, s2
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      16
 
-# RV32-NEXT:  Total Cycles:      45
+# RV32-NEXT:  Total Cycles:      49
 # RV64-NEXT:  Total Cycles:      61
 
 # CHECK-NEXT: Total uOps:        16
 
 # CHECK:      Dispatch Width:    1
 
-# RV32-NEXT:  uOps Per Cycle:    0.36
-# RV32-NEXT:  IPC:               0.36
-# RV32-NEXT:  Block RThroughput: 14.0
+# RV32-NEXT:  uOps Per Cycle:    0.33
+# RV32-NEXT:  IPC:               0.33
+# RV32-NEXT:  Block RThroughput: 16.0
 
 # RV64-NEXT:  uOps Per Cycle:    0.26
 # RV64-NEXT:  IPC:               0.26
@@ -39,14 +39,14 @@ add s2, s2, s2
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 
-# RV32-NEXT:   1      7     7.00                        div	a0, a0, a0
+# RV32-NEXT:   1      8     8.00                        div	a0, a0, a0
 # RV64-NEXT:   1      11    11.00                       div	a0, a0, a0
 
 # CHECK-NEXT:  1      2     1.00                        mul	t0, a0, t0
 # CHECK-NEXT:  1      1     1.00                        add	t1, a0, t0
 # CHECK-NEXT:  1      1     1.00                        add	t2, t2, t2
 
-# RV32-NEXT:   1      7     7.00                        div	a1, a1, a1
+# RV32-NEXT:   1      8     8.00                        div	a1, a1, a1
 # RV64-NEXT:   1      11    11.00                       div	a1, a1, a1
 
 # CHECK-NEXT:  1      2     1.00                        mul	s0, a1, s0
@@ -70,20 +70,20 @@ add s2, s2, s2
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]
 
-# RV32-NEXT:  4.00    -     14.00   -     2.00
+# RV32-NEXT:  4.00    -     16.00   -     2.00
 # RV64-NEXT:  4.00    -     22.00   -     2.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    Instructions:
 
-# RV32-NEXT:   -      -     7.00    -      -     div	a0, a0, a0
+# RV32-NEXT:   -      -     8.00    -      -     div	a0, a0, a0
 # RV64-NEXT:   -      -     11.00   -      -     div	a0, a0, a0
 
 # CHECK-NEXT:  -      -      -      -     1.00   mul	t0, a0, t0
 # CHECK-NEXT: 1.00    -      -      -      -     add	t1, a0, t0
 # CHECK-NEXT: 1.00    -      -      -      -     add	t2, t2, t2
 
-# RV32-NEXT:   -      -     7.00    -      -     div	a1, a1, a1
+# RV32-NEXT:   -      -     8.00    -      -     div	a1, a1, a1
 # RV64-NEXT:   -      -     11.00   -      -     div	a1, a1, a1
 
 # CHECK-NEXT:  -      -      -      -     1.00   mul	s0, a1, s0



More information about the cfe-commits mailing list