[llvm] [RISCV] Separate addend from FMA operands to support cascade FMA. NFC. (PR #70241)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 25 11:44:31 PDT 2023
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/70241
This PR separate addend from FMA operands to support cascade FMA. In some microarchitectures (e.g., ARM cortex-a72 and XiangShan-NanHu), FP multiply-accumulate pipelines support late-forwarding of accumulate operands, which reduces the latency of a sequence of multiply-accumulate instructions.
See also #70232.
>From 5f0da9b6b68b617ee623df19538e2b0595c5fd00 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 26 Oct 2023 02:37:48 +0800
Subject: [PATCH] [RISCV] Separate addend from FMA operands to support cascade
FMA. NFC.
---
llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 2 +-
llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 2 +-
llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 2 +-
llvm/lib/Target/RISCV/RISCVSchedRocket.td | 2 ++
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 3 +++
llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td | 2 ++
llvm/lib/Target/RISCV/RISCVSchedule.td | 3 +++
7 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 59312f02aeceb77..34becfafe77473d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -78,7 +78,7 @@ def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>;
} // Predicates = [HasStdExtD]
foreach Ext = DExts in {
- let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
+ let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64Addend] in {
defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", Ext>;
defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", Ext>;
defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", Ext>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 8726245f1602ebf..3a5794bb2d19474 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -302,7 +302,7 @@ def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
} // Predicates = [HasStdExtF]
foreach Ext = FExts in {
- let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
+ let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in {
defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>;
defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", Ext>;
defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", Ext>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index b65e9f5af033194..1dc391d3f084fec 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -85,7 +85,7 @@ def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
} // Predicates = [HasHalfFPLoadStoreMove]
foreach Ext = ZfhExts in {
- let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
+ let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16Addend] in {
defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", Ext>;
defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", Ext>;
defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", Ext>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 8fbc9afe267c562..bb9dfe5d0124098 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -206,7 +206,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 96ebe8e3e67686a..d2447cf23e266c6 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -933,10 +933,13 @@ def : ReadAdvance<ReadFAdd32, 0>;
def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul16, 0>;
def : ReadAdvance<ReadFMA16, 0>;
+def : ReadAdvance<ReadFMA16Addend, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv16, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index 960258c8bc7dfe8..06ad2075b073614 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -164,7 +164,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index af318ea5bf6851a..f6c1b096ad90c46 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -150,8 +150,11 @@ def ReadFMul16 : SchedRead; // 16-bit floating point multiply
def ReadFMul32 : SchedRead; // 32-bit floating point multiply
def ReadFMul64 : SchedRead; // 64-bit floating point multiply
def ReadFMA16 : SchedRead; // 16-bit floating point fused multiply-add
+def ReadFMA16Addend : SchedRead; // 16-bit floating point fused multiply-add (addend)
def ReadFMA32 : SchedRead; // 32-bit floating point fused multiply-add
+def ReadFMA32Addend : SchedRead; // 32-bit floating point fused multiply-add (addend)
def ReadFMA64 : SchedRead; // 64-bit floating point fused multiply-add
+def ReadFMA64Addend : SchedRead; // 64-bit floating point fused multiply-add (addend)
def ReadFDiv16 : SchedRead; // 16-bit floating point divide
def ReadFDiv32 : SchedRead; // 32-bit floating point divide
def ReadFDiv64 : SchedRead; // 64-bit floating point divide
More information about the llvm-commits
mailing list