[llvm] [RISCV] Implement foward inserting save/restore FRM instructions. (PR #77744)
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 24 18:07:34 PST 2024
https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/77744
>From 61af743723a156fd0f1c24d8b36d3160b38080f8 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Thu, 11 Jan 2024 13:52:54 +0800
Subject: [PATCH 1/7] [RISCV] Implement foward inserting save/restore FRM
instructions.
Previously, RISCVInsertReadWriteCSR inserted an FRM swap for any value other
than 7 and restored the original value right after the vector instruction. This
is inefficient if multiple vector instructions use the same rounding mode or if
the next vector instruction uses a different explicit rounding mode.
This patch implements a local optimization to solve the above problem. We assume
the starting rounding mode of the basic block is "dynamic." When iterating
through a basic block and encountering an instruction whose rounding mode is not
the same as the current rounding mode, we change the current rounding mode and
save the current rounding mode if needed. And we may need to restore FRM when
encountering function call, inline asm and some uses of FRM.
The advanced version of this is to perform cross basic block analysis for the
starting rounding mode of each basic block.
---
.../Target/RISCV/RISCVInsertReadWriteCSR.cpp | 116 +++++++++-
llvm/test/CodeGen/RISCV/rvv/frm-insert.ll | 217 ++++++++++++++++++
2 files changed, 331 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
diff --git a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index b807abcc56819bd..4d574a588adc469 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -23,6 +23,10 @@ using namespace llvm;
#define DEBUG_TYPE "riscv-insert-read-write-csr"
#define RISCV_INSERT_READ_WRITE_CSR_NAME "RISC-V Insert Read/Write CSR Pass"
+static cl::opt<bool> DisableFRMInsertOpt(
+ "riscv-disable-frm-insert-opt", cl::init(false), cl::Hidden,
+ cl::desc("Disable optimized frm insertion."));
+
namespace {
class RISCVInsertReadWriteCSR : public MachineFunctionPass {
@@ -46,6 +50,7 @@ class RISCVInsertReadWriteCSR : public MachineFunctionPass {
private:
bool emitWriteRoundingMode(MachineBasicBlock &MBB);
+ bool emitWriteRoundingModeOpt(MachineBasicBlock &MBB);
};
} // end anonymous namespace
@@ -55,6 +60,109 @@ char RISCVInsertReadWriteCSR::ID = 0;
INITIALIZE_PASS(RISCVInsertReadWriteCSR, DEBUG_TYPE,
RISCV_INSERT_READ_WRITE_CSR_NAME, false, false)
+// TODO: Use more accurate rounding mode at the start of MBB.
+bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ MachineInstr *LastFRMChanger = nullptr;
+ std::optional<unsigned> CurrentRM = RISCVFPRndMode::DYN;
+ std::optional<Register> SavedFRM;
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == RISCV::SwapFRMImm ||
+ MI.getOpcode() == RISCV::WriteFRMImm ) {
+ CurrentRM = MI.getOperand(0).getImm();
+ SavedFRM = std::nullopt;
+ continue;
+ }
+
+ if (MI.getOpcode() == RISCV::WriteFRM) {
+ CurrentRM = RISCVFPRndMode::DYN;
+ SavedFRM = std::nullopt;
+ continue;
+ }
+
+ if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::FRM)) {
+ // Restore FRM before unknown operations.
+ if (SavedFRM.has_value())
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM))
+ .addReg(*SavedFRM);
+ CurrentRM = RISCVFPRndMode::DYN;
+ SavedFRM = std::nullopt;
+ continue;
+ }
+
+ assert(!MI.modifiesRegister(RISCV::FRM) &&
+ "Expected that MI could not modify FRM.");
+
+ auto getInstructionRM = [](MachineInstr &MI) -> std::optional<unsigned> {
+ int FRMIdx = RISCVII::getFRMOpNum(MI.getDesc());
+ if (FRMIdx >= 0)
+ return MI.getOperand(FRMIdx).getImm();
+
+ if (!MI.hasRegisterImplicitUseOperand(RISCV::FRM))
+ return std::nullopt;
+
+ // FIXME: Return nullopt if the rounding mode of MI is not DYN, like
+ // FADD_S with RTZ.
+ return RISCVFPRndMode::DYN;
+ };
+
+ std::optional<unsigned> InstrRM = getInstructionRM(MI);
+
+ // Skip if MI does not need FRM.
+ if (!InstrRM.has_value())
+ continue;
+
+ if (InstrRM != RISCVFPRndMode::DYN)
+ LastFRMChanger = &MI;
+
+ if (!MI.readsRegister(RISCV::FRM))
+ MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+
+ // Skip if MI uses same rounding mode as FRM.
+ if (InstrRM == CurrentRM)
+ continue;
+
+ if (InstrRM == RISCVFPRndMode::DYN) {
+ if (!SavedFRM.has_value())
+ continue;
+ // SavedFRM not having a value means current FRM has correct rounding mode.
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM))
+ .addReg(*SavedFRM);
+ SavedFRM = std::nullopt;
+ CurrentRM = RISCVFPRndMode::DYN;
+ continue;
+ }
+
+ if (CurrentRM == RISCVFPRndMode::DYN) {
+ // Save current FRM value to SavedFRM.
+ MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
+ SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm),
+ *SavedFRM)
+ .addImm(*InstrRM);
+ } else {
+ // Don't need to save current FRM when CurrentRM != DYN.
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRMImm))
+ .addImm(*InstrRM);
+ }
+ CurrentRM = InstrRM;
+ Changed = true;
+ }
+
+ // Restore FRM if needed.
+ if (SavedFRM.has_value()) {
+ assert(LastFRMChanger && "Expected valid pointer.");
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM))
+ .addReg(*SavedFRM);
+ MBB.insertAfter(LastFRMChanger, MIB);
+ }
+
+ return Changed;
+}
+
// This function also swaps frm and restores it when encountering an RVV
// floating point instruction with a static rounding mode.
bool RISCVInsertReadWriteCSR::emitWriteRoundingMode(MachineBasicBlock &MBB) {
@@ -99,8 +207,12 @@ bool RISCVInsertReadWriteCSR::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- for (MachineBasicBlock &MBB : MF)
- Changed |= emitWriteRoundingMode(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ if (DisableFRMInsertOpt)
+ Changed |= emitWriteRoundingMode(MBB);
+ else
+ Changed |= emitWriteRoundingModeOpt(MBB);
+ }
return Changed;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
new file mode 100644
index 000000000000000..c75c35602fbabdc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
@@ -0,0 +1,217 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d < %s | FileCheck %s
+
+declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ i64, i64);
+
+; Test only save/restore frm once.
+define <vscale x 1 x float> @test(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 0, i64 %2)
+ %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %a,
+ <vscale x 1 x float> %a,
+ i64 0, i64 %2)
+ ret <vscale x 1 x float> %b
+}
+
+; Test only restore frm once.
+define <vscale x 1 x float> @test2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrmi 1
+; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 0, i64 %2)
+ %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %a,
+ <vscale x 1 x float> %a,
+ i64 1, i64 %2)
+ ret <vscale x 1 x float> %b
+}
+
+; Test restoring frm before function call and doing nothing with folling dynamic
+; rounding mode operations.
+declare void @foo()
+define <vscale x 1 x float> @test3(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: call foo
+; CHECK-NEXT: vsetvli zero, s0, e32, mf2, ta, ma
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 0, i64 %2)
+ call void @foo()
+ %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %a,
+ <vscale x 1 x float> %a,
+ i64 7, i64 %2)
+ ret <vscale x 1 x float> %b
+}
+
+; Test restoring frm before inline asm and doing nothing with folling dynamic
+; rounding mode operations.
+define <vscale x 1 x float> @test4(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: test4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 0, i64 %2)
+ call void asm sideeffect "", ""()
+ %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %a,
+ <vscale x 1 x float> %a,
+ i64 7, i64 %2)
+ ret <vscale x 1 x float> %b
+}
+
+; Test restoring frm before reading frmm and doing nothing with folling dynamic
+; rounding mode operations.
+declare i32 @llvm.get.rounding()
+define <vscale x 1 x float> @test5(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2, ptr %p) nounwind {
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: frrm a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: lui a2, 66
+; CHECK-NEXT: addiw a2, a2, 769
+; CHECK-NEXT: srl a0, a2, a0
+; CHECK-NEXT: andi a0, a0, 7
+; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: sw a0, 0(a1)
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 0, i64 %2)
+ %rm = call i32 @llvm.get.rounding()
+ store i32 %rm, ptr %p, align 4
+ %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %a,
+ <vscale x 1 x float> %a,
+ i64 7, i64 %2)
+ ret <vscale x 1 x float> %b
+}
+
+; Test not set FRM for the two vfadd after WriteFRMImm.
+declare void @llvm.set.rounding(i32)
+define <vscale x 1 x float> @test6(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: test6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi 4
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.set.rounding(i32 4)
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 4, i64 %2)
+ %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %a,
+ <vscale x 1 x float> %a,
+ i64 7, i64 %2)
+ ret <vscale x 1 x float> %b
+}
+
+; Test not set FRM for the vfadd after WriteFRM.
+define <vscale x 1 x float> @test7(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %rm, i64 %2) nounwind {
+; CHECK-LABEL: test7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a0, a0, 32
+; CHECK-NEXT: srli a0, a0, 30
+; CHECK-NEXT: lui a2, 66
+; CHECK-NEXT: addiw a2, a2, 769
+; CHECK-NEXT: srl a0, a2, a0
+; CHECK-NEXT: andi a0, a0, 7
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.set.rounding(i32 %rm)
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 7, i64 %2)
+ ret <vscale x 1 x float> %a
+}
+
>From bdfe1a8502bd6189d258d2c989bf64e9ae32061c Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 16 Jan 2024 10:44:44 +0800
Subject: [PATCH 2/7] Fix typos and remove wrong comments.
---
llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp | 2 --
llvm/test/CodeGen/RISCV/rvv/frm-insert.ll | 12 ++++++------
2 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index 4d574a588adc469..4b49279bd4808e1 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -102,8 +102,6 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
if (!MI.hasRegisterImplicitUseOperand(RISCV::FRM))
return std::nullopt;
- // FIXME: Return nullopt if the rounding mode of MI is not DYN, like
- // FADD_S with RTZ.
return RISCVFPRndMode::DYN;
};
diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
index c75c35602fbabdc..1d7aeb1c3629648 100644
--- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
@@ -5,7 +5,7 @@ declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float>,
<vscale x 1 x float>,
<vscale x 1 x float>,
- i64, i64);
+ i64, i64)
; Test only save/restore frm once.
define <vscale x 1 x float> @test(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
@@ -56,8 +56,8 @@ entry:
ret <vscale x 1 x float> %b
}
-; Test restoring frm before function call and doing nothing with folling dynamic
-; rounding mode operations.
+; Test restoring frm before function call and doing nothing with following
+; dynamic rounding mode operations.
declare void @foo()
define <vscale x 1 x float> @test3(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: test3:
@@ -102,7 +102,7 @@ entry:
ret <vscale x 1 x float> %b
}
-; Test restoring frm before inline asm and doing nothing with folling dynamic
+; Test restoring frm before inline asm and doing nothing with following dynamic
; rounding mode operations.
define <vscale x 1 x float> @test4(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: test4:
@@ -131,8 +131,8 @@ entry:
ret <vscale x 1 x float> %b
}
-; Test restoring frm before reading frmm and doing nothing with folling dynamic
-; rounding mode operations.
+; Test restoring frm before reading frm and doing nothing with following
+; dynamic rounding mode operations.
declare i32 @llvm.get.rounding()
define <vscale x 1 x float> @test5(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2, ptr %p) nounwind {
; CHECK-LABEL: test5:
>From 68ddd7bd76acb572515b4502a5455f2232d67914 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 16 Jan 2024 10:45:56 +0800
Subject: [PATCH 3/7] Run clnag-format.
---
.../Target/RISCV/RISCVInsertReadWriteCSR.cpp | 23 ++++++++++---------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index 4b49279bd4808e1..898b058bfa3c09c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -23,9 +23,10 @@ using namespace llvm;
#define DEBUG_TYPE "riscv-insert-read-write-csr"
#define RISCV_INSERT_READ_WRITE_CSR_NAME "RISC-V Insert Read/Write CSR Pass"
-static cl::opt<bool> DisableFRMInsertOpt(
- "riscv-disable-frm-insert-opt", cl::init(false), cl::Hidden,
- cl::desc("Disable optimized frm insertion."));
+static cl::opt<bool>
+ DisableFRMInsertOpt("riscv-disable-frm-insert-opt", cl::init(false),
+ cl::Hidden,
+ cl::desc("Disable optimized frm insertion."));
namespace {
@@ -69,7 +70,7 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() == RISCV::SwapFRMImm ||
- MI.getOpcode() == RISCV::WriteFRMImm ) {
+ MI.getOpcode() == RISCV::WriteFRMImm) {
CurrentRM = MI.getOperand(0).getImm();
SavedFRM = std::nullopt;
continue;
@@ -85,7 +86,7 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
// Restore FRM before unknown operations.
if (SavedFRM.has_value())
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM))
- .addReg(*SavedFRM);
+ .addReg(*SavedFRM);
CurrentRM = RISCVFPRndMode::DYN;
SavedFRM = std::nullopt;
continue;
@@ -125,9 +126,10 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
if (InstrRM == RISCVFPRndMode::DYN) {
if (!SavedFRM.has_value())
continue;
- // SavedFRM not having a value means current FRM has correct rounding mode.
+ // SavedFRM not having a value means current FRM has correct rounding
+ // mode.
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM))
- .addReg(*SavedFRM);
+ .addReg(*SavedFRM);
SavedFRM = std::nullopt;
CurrentRM = RISCVFPRndMode::DYN;
continue;
@@ -137,13 +139,12 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
// Save current FRM value to SavedFRM.
MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm),
- *SavedFRM)
- .addImm(*InstrRM);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm), *SavedFRM)
+ .addImm(*InstrRM);
} else {
// Don't need to save current FRM when CurrentRM != DYN.
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRMImm))
- .addImm(*InstrRM);
+ .addImm(*InstrRM);
}
CurrentRM = InstrRM;
Changed = true;
>From da0489ef841a023fce9cd23d8c8cdcaf2dd554b4 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Thu, 18 Jan 2024 13:32:13 +0800
Subject: [PATCH 4/7] Refine code and test.
---
.../Target/RISCV/RISCVInsertReadWriteCSR.cpp | 49 +--
llvm/test/CodeGen/RISCV/rvv/frm-insert.ll | 281 +++++++++++++++---
2 files changed, 251 insertions(+), 79 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index 898b058bfa3c09c..4c79e4e61ef7c54 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -65,7 +65,7 @@ INITIALIZE_PASS(RISCVInsertReadWriteCSR, DEBUG_TYPE,
bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
bool Changed = false;
MachineInstr *LastFRMChanger = nullptr;
- std::optional<unsigned> CurrentRM = RISCVFPRndMode::DYN;
+ unsigned CurrentRM = RISCVFPRndMode::DYN;
std::optional<Register> SavedFRM;
for (MachineInstr &MI : MBB) {
@@ -95,56 +95,31 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
assert(!MI.modifiesRegister(RISCV::FRM) &&
"Expected that MI could not modify FRM.");
- auto getInstructionRM = [](MachineInstr &MI) -> std::optional<unsigned> {
- int FRMIdx = RISCVII::getFRMOpNum(MI.getDesc());
- if (FRMIdx >= 0)
- return MI.getOperand(FRMIdx).getImm();
-
- if (!MI.hasRegisterImplicitUseOperand(RISCV::FRM))
- return std::nullopt;
-
- return RISCVFPRndMode::DYN;
- };
-
- std::optional<unsigned> InstrRM = getInstructionRM(MI);
-
- // Skip if MI does not need FRM.
- if (!InstrRM.has_value())
+ int FRMIdx = RISCVII::getFRMOpNum(MI.getDesc());
+ if (FRMIdx < 0)
continue;
+ unsigned InstrRM = MI.getOperand(FRMIdx).getImm();
- if (InstrRM != RISCVFPRndMode::DYN)
- LastFRMChanger = &MI;
+ LastFRMChanger = &MI;
- if (!MI.readsRegister(RISCV::FRM))
- MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
- /*IsImp*/ true));
+ // Make MI implicit use FRM.
+ MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
+ /*IsImp*/ true));
// Skip if MI uses same rounding mode as FRM.
if (InstrRM == CurrentRM)
continue;
- if (InstrRM == RISCVFPRndMode::DYN) {
- if (!SavedFRM.has_value())
- continue;
- // SavedFRM not having a value means current FRM has correct rounding
- // mode.
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM))
- .addReg(*SavedFRM);
- SavedFRM = std::nullopt;
- CurrentRM = RISCVFPRndMode::DYN;
- continue;
- }
-
- if (CurrentRM == RISCVFPRndMode::DYN) {
+ if (!SavedFRM.has_value()) {
// Save current FRM value to SavedFRM.
MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm), *SavedFRM)
- .addImm(*InstrRM);
+ .addImm(InstrRM);
} else {
- // Don't need to save current FRM when CurrentRM != DYN.
+ // Don't need to save current FRM when SavedFRM having value.
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRMImm))
- .addImm(*InstrRM);
+ .addImm(InstrRM);
}
CurrentRM = InstrRM;
Changed = true;
diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
index 1d7aeb1c3629648..baa8907d8384786 100644
--- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
@@ -56,36 +56,53 @@ entry:
ret <vscale x 1 x float> %b
}
-; Test restoring frm before function call and doing nothing with following
-; dynamic rounding mode operations.
declare void @foo()
-define <vscale x 1 x float> @test3(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: test3:
+define <vscale x 1 x float> @just_call(<vscale x 1 x float> %0) nounwind {
+; CHECK-LABEL: just_call:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -48
+; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: addi a0, sp, 32
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: call foo
+; CHECK-NEXT: addi a0, sp, 32
+; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ret
+entry:
+ call void @foo()
+ ret <vscale x 1 x float> %0
+}
+
+define <vscale x 1 x float> @before_call1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: before_call1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -48
+; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: addi a1, sp, 32
; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: call foo
-; CHECK-NEXT: vsetvli zero, s0, e32, mf2, ta, ma
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: addi a0, sp, 32
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfadd.vv v8, v8, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
@@ -94,27 +111,124 @@ entry:
<vscale x 1 x float> %1,
i64 0, i64 %2)
call void @foo()
- %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @before_call2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: before_call2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -48
+; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: addi a0, sp, 32
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: call foo
+; CHECK-NEXT: addi a0, sp, 32
+; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
- <vscale x 1 x float> %a,
- <vscale x 1 x float> %a,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
i64 7, i64 %2)
- ret <vscale x 1 x float> %b
+ call void @foo()
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @after_call1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: after_call1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -48
+; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: addi a1, sp, 32
+; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: call foo
+; CHECK-NEXT: addi a0, sp, 32
+; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 0, i64 %2)
+ call void @foo()
+ ret <vscale x 1 x float> %a
}
-; Test restoring frm before inline asm and doing nothing with following dynamic
-; rounding mode operations.
-define <vscale x 1 x float> @test4(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: test4:
+define <vscale x 1 x float> @after_call2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: after_call2:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -48
+; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: addi a0, sp, 32
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: call foo
+; CHECK-NEXT: addi a0, sp, 32
+; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 7, i64 %2)
+ call void @foo()
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @just_asm(<vscale x 1 x float> %0) nounwind {
+; CHECK-LABEL: just_asm:
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ret
+entry:
+ call void asm sideeffect "", ""()
+ ret <vscale x 1 x float> %0
+}
+
+define <vscale x 1 x float> @before_asm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: before_asm1:
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
@@ -123,16 +237,68 @@ entry:
<vscale x 1 x float> %1,
i64 0, i64 %2)
call void asm sideeffect "", ""()
- %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @before_asm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: before_asm2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
- <vscale x 1 x float> %a,
- <vscale x 1 x float> %a,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
i64 7, i64 %2)
- ret <vscale x 1 x float> %b
+ call void asm sideeffect "", ""()
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @after_asm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: after_asm1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 0, i64 %2)
+ call void asm sideeffect "", ""()
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @after_asm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: after_asm2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 7, i64 %2)
+ call void asm sideeffect "", ""()
+ ret <vscale x 1 x float> %a
}
; Test restoring frm before reading frm and doing nothing with following
; dynamic rounding mode operations.
+; TODO: The frrm could be elided.
declare i32 @llvm.get.rounding()
define <vscale x 1 x float> @test5(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2, ptr %p) nounwind {
; CHECK-LABEL: test5:
@@ -166,15 +332,32 @@ entry:
ret <vscale x 1 x float> %b
}
-; Test not set FRM for the two vfadd after WriteFRMImm.
+; Test not set FRM for vfadd with DYN after WriteFRMImm.
declare void @llvm.set.rounding(i32)
-define <vscale x 1 x float> @test6(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: test6:
+define <vscale x 1 x float> @after_fsrm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: after_fsrm1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi 4
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.set.rounding(i32 4)
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 7, i64 %2)
+ ret <vscale x 1 x float> %a
+}
+
+; Test not set FRM for vfadd with a known rm after WriteFRMImm with same rm.
+define <vscale x 1 x float> @after_fsrm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: after_fsrm2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fsrmi 4
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: vfadd.vv v8, v8, v8
; CHECK-NEXT: ret
entry:
call void @llvm.set.rounding(i32 4)
@@ -183,17 +366,32 @@ entry:
<vscale x 1 x float> %0,
<vscale x 1 x float> %1,
i64 4, i64 %2)
- %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
+ ret <vscale x 1 x float> %a
+}
+
+; Test not set FRM for vfadd with a known rm after WriteFRMImm with same rm.
+define <vscale x 1 x float> @after_fsrm3(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: after_fsrm3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi 4
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: fsrmi a0, 5
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.set.rounding(i32 4)
+ %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
- <vscale x 1 x float> %a,
- <vscale x 1 x float> %a,
- i64 7, i64 %2)
- ret <vscale x 1 x float> %b
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ i64 5, i64 %2)
+ ret <vscale x 1 x float> %a
}
; Test not set FRM for the vfadd after WriteFRM.
-define <vscale x 1 x float> @test7(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %rm, i64 %2) nounwind {
-; CHECK-LABEL: test7:
+define <vscale x 1 x float> @after_fsrm4(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %rm, i64 %2) nounwind {
+; CHECK-LABEL: after_fsrm4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: slli a0, a0, 32
; CHECK-NEXT: srli a0, a0, 30
@@ -214,4 +412,3 @@ entry:
i64 7, i64 %2)
ret <vscale x 1 x float> %a
}
-
>From 6bceb5366074247810036822461888ef7cd0aa25 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Thu, 18 Jan 2024 15:16:23 +0800
Subject: [PATCH 5/7] Set change right after adding implict use of frm.
---
llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index 4c79e4e61ef7c54..7365dbdb1295399 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -105,6 +105,7 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
// Make MI implicit use FRM.
MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
/*IsImp*/ true));
+ Changed = true;
// Skip if MI uses same rounding mode as FRM.
if (InstrRM == CurrentRM)
@@ -122,7 +123,6 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
.addImm(InstrRM);
}
CurrentRM = InstrRM;
- Changed = true;
}
// Restore FRM if needed.
>From 5af6ebc5aaf8b6eb6b69ae9a2cdc87ff849eda28 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 23 Jan 2024 18:19:18 +0800
Subject: [PATCH 6/7] Use Register intead of std::optional<Register> for
SavedFRM.
---
.../Target/RISCV/RISCVInsertReadWriteCSR.cpp | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index 7365dbdb1295399..aac0ecc1cfc9b8e 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -66,29 +66,29 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
bool Changed = false;
MachineInstr *LastFRMChanger = nullptr;
unsigned CurrentRM = RISCVFPRndMode::DYN;
- std::optional<Register> SavedFRM;
+ Register SavedFRM;
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() == RISCV::SwapFRMImm ||
MI.getOpcode() == RISCV::WriteFRMImm) {
CurrentRM = MI.getOperand(0).getImm();
- SavedFRM = std::nullopt;
+ SavedFRM = Register();
continue;
}
if (MI.getOpcode() == RISCV::WriteFRM) {
CurrentRM = RISCVFPRndMode::DYN;
- SavedFRM = std::nullopt;
+ SavedFRM = Register();
continue;
}
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::FRM)) {
// Restore FRM before unknown operations.
- if (SavedFRM.has_value())
+ if (SavedFRM.isValid())
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM))
- .addReg(*SavedFRM);
+ .addReg(SavedFRM);
CurrentRM = RISCVFPRndMode::DYN;
- SavedFRM = std::nullopt;
+ SavedFRM = Register();
continue;
}
@@ -111,11 +111,11 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
if (InstrRM == CurrentRM)
continue;
- if (!SavedFRM.has_value()) {
+ if (!SavedFRM.isValid()) {
// Save current FRM value to SavedFRM.
MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm), *SavedFRM)
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm), SavedFRM)
.addImm(InstrRM);
} else {
// Don't need to save current FRM when SavedFRM having value.
@@ -126,11 +126,11 @@ bool RISCVInsertReadWriteCSR::emitWriteRoundingModeOpt(MachineBasicBlock &MBB) {
}
// Restore FRM if needed.
- if (SavedFRM.has_value()) {
+ if (SavedFRM.isValid()) {
assert(LastFRMChanger && "Expected valid pointer.");
MachineInstrBuilder MIB =
BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM))
- .addReg(*SavedFRM);
+ .addReg(SavedFRM);
MBB.insertAfter(LastFRMChanger, MIB);
}
>From 28cf4298fd8877b19c622c05351857d2e539a5c4 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Thu, 25 Jan 2024 10:07:18 +0800
Subject: [PATCH 7/7] Add unoptimized cases in
llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
---
llvm/test/CodeGen/RISCV/rvv/frm-insert.ll | 227 ++++++++++++++++++++++
1 file changed, 227 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
index baa8907d8384786..cdbc6e8d8d55588 100644
--- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d \
+; RUN: -riscv-disable-frm-insert-opt < %s | FileCheck %s --check-prefix=UNOPT
declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float>,
@@ -17,6 +19,17 @@ define <vscale x 1 x float> @test(<vscale x 1 x float> %0, <vscale x 1 x float>
; CHECK-NEXT: vfadd.vv v8, v8, v8
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: test:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v8
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -42,6 +55,17 @@ define <vscale x 1 x float> @test2(<vscale x 1 x float> %0, <vscale x 1 x float>
; CHECK-NEXT: vfadd.vv v8, v8, v8
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: test2:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: fsrmi a0, 1
+; UNOPT-NEXT: vfadd.vv v8, v8, v8
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -76,6 +100,25 @@ define <vscale x 1 x float> @just_call(<vscale x 1 x float> %0) nounwind {
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: just_call:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: addi sp, sp, -48
+; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; UNOPT-NEXT: csrr a0, vlenb
+; UNOPT-NEXT: slli a0, a0, 1
+; UNOPT-NEXT: sub sp, sp, a0
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; UNOPT-NEXT: call foo
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; UNOPT-NEXT: csrr a0, vlenb
+; UNOPT-NEXT: slli a0, a0, 1
+; UNOPT-NEXT: add sp, sp, a0
+; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; UNOPT-NEXT: addi sp, sp, 48
+; UNOPT-NEXT: ret
entry:
call void @foo()
ret <vscale x 1 x float> %0
@@ -104,6 +147,29 @@ define <vscale x 1 x float> @before_call1(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: before_call1:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: addi sp, sp, -48
+; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; UNOPT-NEXT: csrr a1, vlenb
+; UNOPT-NEXT: slli a1, a1, 1
+; UNOPT-NEXT: sub sp, sp, a1
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: addi a1, sp, 32
+; UNOPT-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: call foo
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; UNOPT-NEXT: csrr a0, vlenb
+; UNOPT-NEXT: slli a0, a0, 1
+; UNOPT-NEXT: add sp, sp, a0
+; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; UNOPT-NEXT: addi sp, sp, 48
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -135,6 +201,27 @@ define <vscale x 1 x float> @before_call2(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: before_call2:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: addi sp, sp, -48
+; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; UNOPT-NEXT: csrr a1, vlenb
+; UNOPT-NEXT: slli a1, a1, 1
+; UNOPT-NEXT: sub sp, sp, a1
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; UNOPT-NEXT: call foo
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; UNOPT-NEXT: csrr a0, vlenb
+; UNOPT-NEXT: slli a0, a0, 1
+; UNOPT-NEXT: add sp, sp, a0
+; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; UNOPT-NEXT: addi sp, sp, 48
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -168,6 +255,29 @@ define <vscale x 1 x float> @after_call1(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_call1:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: addi sp, sp, -48
+; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; UNOPT-NEXT: csrr a1, vlenb
+; UNOPT-NEXT: slli a1, a1, 1
+; UNOPT-NEXT: sub sp, sp, a1
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: addi a1, sp, 32
+; UNOPT-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: call foo
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; UNOPT-NEXT: csrr a0, vlenb
+; UNOPT-NEXT: slli a0, a0, 1
+; UNOPT-NEXT: add sp, sp, a0
+; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; UNOPT-NEXT: addi sp, sp, 48
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -199,6 +309,27 @@ define <vscale x 1 x float> @after_call2(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_call2:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: addi sp, sp, -48
+; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; UNOPT-NEXT: csrr a1, vlenb
+; UNOPT-NEXT: slli a1, a1, 1
+; UNOPT-NEXT: sub sp, sp, a1
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; UNOPT-NEXT: call foo
+; UNOPT-NEXT: addi a0, sp, 32
+; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; UNOPT-NEXT: csrr a0, vlenb
+; UNOPT-NEXT: slli a0, a0, 1
+; UNOPT-NEXT: add sp, sp, a0
+; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; UNOPT-NEXT: addi sp, sp, 48
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -215,6 +346,12 @@ define <vscale x 1 x float> @just_asm(<vscale x 1 x float> %0) nounwind {
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: just_asm:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: #APP
+; UNOPT-NEXT: #NO_APP
+; UNOPT-NEXT: ret
entry:
call void asm sideeffect "", ""()
ret <vscale x 1 x float> %0
@@ -230,6 +367,16 @@ define <vscale x 1 x float> @before_asm1(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: before_asm1:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: #APP
+; UNOPT-NEXT: #NO_APP
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -248,6 +395,14 @@ define <vscale x 1 x float> @before_asm2(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: before_asm2:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: #APP
+; UNOPT-NEXT: #NO_APP
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -268,6 +423,16 @@ define <vscale x 1 x float> @after_asm1(<vscale x 1 x float> %0, <vscale x 1 x f
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_asm1:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: #APP
+; UNOPT-NEXT: #NO_APP
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -286,6 +451,14 @@ define <vscale x 1 x float> @after_asm2(<vscale x 1 x float> %0, <vscale x 1 x f
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_asm2:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: #APP
+; UNOPT-NEXT: #NO_APP
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -316,6 +489,22 @@ define <vscale x 1 x float> @test5(<vscale x 1 x float> %0, <vscale x 1 x float>
; CHECK-NEXT: vfadd.vv v8, v8, v8
; CHECK-NEXT: sw a0, 0(a1)
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: test5:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 0
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: frrm a0
+; UNOPT-NEXT: slli a0, a0, 2
+; UNOPT-NEXT: lui a2, 66
+; UNOPT-NEXT: addiw a2, a2, 769
+; UNOPT-NEXT: srl a0, a2, a0
+; UNOPT-NEXT: andi a0, a0, 7
+; UNOPT-NEXT: vfadd.vv v8, v8, v8
+; UNOPT-NEXT: sw a0, 0(a1)
+; UNOPT-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
<vscale x 1 x float> undef,
@@ -341,6 +530,13 @@ define <vscale x 1 x float> @after_fsrm1(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_fsrm1:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: fsrmi 4
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: ret
entry:
call void @llvm.set.rounding(i32 4)
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
@@ -359,6 +555,15 @@ define <vscale x 1 x float> @after_fsrm2(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_fsrm2:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: fsrmi 4
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 4
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: ret
entry:
call void @llvm.set.rounding(i32 4)
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
@@ -379,6 +584,15 @@ define <vscale x 1 x float> @after_fsrm3(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_fsrm3:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: fsrmi 4
+; UNOPT-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; UNOPT-NEXT: fsrmi a0, 5
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: ret
entry:
call void @llvm.set.rounding(i32 4)
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
@@ -403,6 +617,19 @@ define <vscale x 1 x float> @after_fsrm4(<vscale x 1 x float> %0, <vscale x 1 x
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
+;
+; UNOPT-LABEL: after_fsrm4:
+; UNOPT: # %bb.0: # %entry
+; UNOPT-NEXT: slli a0, a0, 32
+; UNOPT-NEXT: srli a0, a0, 30
+; UNOPT-NEXT: lui a2, 66
+; UNOPT-NEXT: addiw a2, a2, 769
+; UNOPT-NEXT: srl a0, a2, a0
+; UNOPT-NEXT: andi a0, a0, 7
+; UNOPT-NEXT: fsrm a0
+; UNOPT-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; UNOPT-NEXT: vfadd.vv v8, v8, v9
+; UNOPT-NEXT: ret
entry:
call void @llvm.set.rounding(i32 %rm)
%a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
More information about the llvm-commits
mailing list