[llvm] [RISCV][GISel] Fold G_FCONSTANT 0.0 store into G_CONSTANT x0 (PR #163008)
Shaoce SUN via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 14 20:14:54 PDT 2025
https://github.com/sunshaoce updated https://github.com/llvm/llvm-project/pull/163008
>From 9225aa95ba8e05a60d1edd0b5e5310c5e7dc552b Mon Sep 17 00:00:00 2001
From: Shaoce SUN <sunshaoce at outlook.com>
Date: Sun, 12 Oct 2025 00:37:22 +0800
Subject: [PATCH 1/7] pre-commit
---
.../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 44 +++++++++++++++++++
1 file changed, 44 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
new file mode 100644
index 0000000000000..4939fe11c5394
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=RV64
+
+define void @zero_f32(ptr %i) {
+; RV32-LABEL: zero_f32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x fa5, zero
+; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_f32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x fa5, zero
+; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: ret
+entry:
+ store float 0.000000e+00, ptr %i, align 4
+ ret void
+}
+
+
+define void @zero_f64(ptr %i) {
+; RV32-LABEL: zero_f64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a1, %hi(.LCPI1_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI1_0)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
+; RV32-NEXT: sw a2, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_f64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.d.x fa5, zero
+; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: ret
+entry:
+ store double 0.000000e+00, ptr %i, align 8
+ ret void
+}
>From 9b25edd6384d722e1980fff61d51c0999d7b9d62 Mon Sep 17 00:00:00 2001
From: Shaoce SUN <sunshaoce at outlook.com>
Date: Sun, 12 Oct 2025 00:39:15 +0800
Subject: [PATCH 2/7] [RISCV][GISel] Fold `G_FCONSTANT` 0.0 store into `sw x0`
---
.../Target/RISCV/GISel/RISCVInstructionSelector.cpp | 13 +++++++++++++
.../CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll | 9 +++------
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 53633eac3d2c3..54050242b1854 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -1049,6 +1049,19 @@ void RISCVInstructionSelector::preISelLower(MachineInstr &MI,
MRI->setType(DstReg, sXLen);
break;
}
+ case TargetOpcode::G_STORE: {
+ Register SrcReg = MI.getOperand(0).getReg();
+ MachineInstr *Def = MRI->getVRegDef(SrcReg);
+ if (Def && Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
+ if (Def->getOperand(1).getFPImm()->getValueAPF().isPosZero()) {
+ MI.getOperand(0).setReg(RISCV::X0);
+
+ if (MRI->use_nodbg_empty(SrcReg))
+ Def->eraseFromParent();
+ }
+ }
+ break;
+ }
}
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
index 4939fe11c5394..d9a6e4b5ec53a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
@@ -7,14 +7,12 @@
define void @zero_f32(ptr %i) {
; RV32-LABEL: zero_f32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: fmv.w.x fa5, zero
-; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.w.x fa5, zero
-; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: ret
entry:
store float 0.000000e+00, ptr %i, align 4
@@ -35,8 +33,7 @@ define void @zero_f64(ptr %i) {
;
; RV64-LABEL: zero_f64:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.d.x fa5, zero
-; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: ret
entry:
store double 0.000000e+00, ptr %i, align 8
>From 74e65af91c4ce352b22faebc8030b46e00d325e4 Mon Sep 17 00:00:00 2001
From: Shaoce SUN <sunshaoce at outlook.com>
Date: Sun, 12 Oct 2025 18:15:26 +0800
Subject: [PATCH 3/7] only add tests
---
.../RISCV/GISel/RISCVInstructionSelector.cpp | 13 --
.../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 189 +++++++++++++++++-
2 files changed, 180 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 54050242b1854..53633eac3d2c3 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -1049,19 +1049,6 @@ void RISCVInstructionSelector::preISelLower(MachineInstr &MI,
MRI->setType(DstReg, sXLen);
break;
}
- case TargetOpcode::G_STORE: {
- Register SrcReg = MI.getOperand(0).getReg();
- MachineInstr *Def = MRI->getVRegDef(SrcReg);
- if (Def && Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
- if (Def->getOperand(1).getFPImm()->getValueAPF().isPosZero()) {
- MI.getOperand(0).setReg(RISCV::X0);
-
- if (MRI->use_nodbg_empty(SrcReg))
- Def->eraseFromParent();
- }
- }
- break;
- }
}
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
index d9a6e4b5ec53a..37a026ef06821 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
@@ -1,21 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \
+; RUN: llc -global-isel -mtriple=riscv32 -global-isel -mattr=+f,+zfh < %s \
; RUN: | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: llc -global-isel -mtriple=riscv64 -global-isel -mattr=+d,+zfh < %s \
; RUN: | FileCheck %s --check-prefix=RV64
+define void @zero_f16(ptr %i) {
+; RV32-LABEL: zero_f16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.h.x fa5, zero
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_f16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.h.x fa5, zero
+; RV64-NEXT: fsh fa5, 0(a0)
+; RV64-NEXT: ret
+entry:
+ store half 0.0, ptr %i, align 4
+ ret void
+}
+
+define void @zero_bf16(ptr %i) {
+; RV32-LABEL: zero_bf16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.h.x fa5, zero
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_bf16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.h.x fa5, zero
+; RV64-NEXT: fsh fa5, 0(a0)
+; RV64-NEXT: ret
+entry:
+ store bfloat 0.0, ptr %i, align 4
+ ret void
+}
+
define void @zero_f32(ptr %i) {
; RV32-LABEL: zero_f32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: fmv.w.x fa5, zero
+; RV32-NEXT: fsw fa5, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: sw zero, 0(a0)
+; RV64-NEXT: fmv.w.x fa5, zero
+; RV64-NEXT: fsw fa5, 0(a0)
; RV64-NEXT: ret
entry:
- store float 0.000000e+00, ptr %i, align 4
+ store float 0.0, ptr %i, align 4
ret void
}
@@ -23,8 +59,8 @@ entry:
define void @zero_f64(ptr %i) {
; RV32-LABEL: zero_f64:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a1, %hi(.LCPI1_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI1_0)
+; RV32-NEXT: lui a1, %hi(.LCPI3_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI3_0)
; RV32-NEXT: lw a2, 0(a1)
; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: sw a2, 0(a0)
@@ -33,9 +69,144 @@ define void @zero_f64(ptr %i) {
;
; RV64-LABEL: zero_f64:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: sd zero, 0(a0)
+; RV64-NEXT: fmv.d.x fa5, zero
+; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: ret
+entry:
+ store double 0.0, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v1f32(ptr %i) {
+; RV32-LABEL: zero_v1f32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x fa5, zero
+; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_v1f32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x fa5, zero
+; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: ret
+entry:
+ store <1 x float> <float 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v2f32(ptr %i) {
+; RV32-LABEL: zero_v2f32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x fa5, zero
+; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: fsw fa5, 4(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_v2f32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x fa5, zero
+; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: fsw fa5, 4(a0)
+; RV64-NEXT: ret
+entry:
+ store <2 x float> <float 0.0, float 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v4f32(ptr %i) {
+; RV32-LABEL: zero_v4f32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x fa5, zero
+; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: fsw fa5, 4(a0)
+; RV32-NEXT: fsw fa5, 8(a0)
+; RV32-NEXT: fsw fa5, 12(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_v4f32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x fa5, zero
+; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: fsw fa5, 4(a0)
+; RV64-NEXT: fsw fa5, 8(a0)
+; RV64-NEXT: fsw fa5, 12(a0)
+; RV64-NEXT: ret
+entry:
+ store <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v1f64(ptr %i) {
+; RV32-LABEL: zero_v1f64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a1, %hi(.LCPI7_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI7_0)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
+; RV32-NEXT: sw a2, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_v1f64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.d.x fa5, zero
+; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: ret
+entry:
+ store <1 x double> <double 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v2f64(ptr %i) {
+; RV32-LABEL: zero_v2f64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a1, %hi(.LCPI8_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
+; RV32-NEXT: sw a2, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
+; RV32-NEXT: sw a2, 8(a0)
+; RV32-NEXT: sw a1, 12(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_v2f64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.d.x fa5, zero
+; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: fsd fa5, 8(a0)
+; RV64-NEXT: ret
+entry:
+ store <2 x double> <double 0.0, double 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v4f64(ptr %i) {
+; RV32-LABEL: zero_v4f64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a1, %hi(.LCPI9_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI9_0)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
+; RV32-NEXT: sw a2, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
+; RV32-NEXT: sw a2, 8(a0)
+; RV32-NEXT: sw a1, 12(a0)
+; RV32-NEXT: sw a2, 16(a0)
+; RV32-NEXT: sw a1, 20(a0)
+; RV32-NEXT: sw a2, 24(a0)
+; RV32-NEXT: sw a1, 28(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: zero_v4f64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.d.x fa5, zero
+; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: fsd fa5, 8(a0)
+; RV64-NEXT: fsd fa5, 16(a0)
+; RV64-NEXT: fsd fa5, 24(a0)
; RV64-NEXT: ret
entry:
- store double 0.000000e+00, ptr %i, align 8
+ store <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, ptr %i, align 8
ret void
}
>From 1946132eda359ae8f5d6f7c3574e918f03be251e Mon Sep 17 00:00:00 2001
From: Shaoce SUN <sunshaoce at outlook.com>
Date: Sun, 12 Oct 2025 18:31:15 +0800
Subject: [PATCH 4/7] remove duplicate option
---
llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
index 37a026ef06821..52bcd653a8480 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=riscv32 -global-isel -mattr=+f,+zfh < %s \
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+f,+zfh < %s \
; RUN: | FileCheck %s --check-prefix=RV32
-; RUN: llc -global-isel -mtriple=riscv64 -global-isel -mattr=+d,+zfh < %s \
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+d,+zfh < %s \
; RUN: | FileCheck %s --check-prefix=RV64
define void @zero_f16(ptr %i) {
>From 1dbeb86852a38789a1338690d3ad8e486f0e99ee Mon Sep 17 00:00:00 2001
From: Shaoce SUN <sunshaoce at outlook.com>
Date: Mon, 13 Oct 2025 00:31:35 +0800
Subject: [PATCH 5/7] update
---
.../GISel/RISCVPostLegalizerCombiner.cpp | 58 ++++++++++++++-
.../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 72 ++++++++-----------
2 files changed, 85 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
index 67b510dc80f1e..802acc8ff239c 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/FormatVariadic.h"
#define GET_GICOMBINER_DEPS
#include "RISCVGenPostLegalizeGICombiner.inc"
@@ -98,6 +99,8 @@ class RISCVPostLegalizerCombiner : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool combineFPZeroStore(MachineFunction &MF, const RISCVSubtarget &STI);
+
private:
RISCVPostLegalizerCombinerImplRuleConfig RuleConfig;
};
@@ -122,6 +125,54 @@ RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner()
report_fatal_error("Invalid rule identifier");
}
+/// Try to fold:
+/// G_STORE (G_FCONSTANT +0.0), addr
+/// into:
+/// G_STORE (G_CONSTANT 0 [XLEN]), addr
+bool RISCVPostLegalizerCombiner::combineFPZeroStore(MachineFunction &MF,
+ const RISCVSubtarget &STI) {
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (MI.getOpcode() != TargetOpcode::G_STORE)
+ continue;
+
+ Register SrcReg = MI.getOperand(0).getReg();
+ if (!SrcReg.isVirtual())
+ continue;
+
+ MachineInstr *Def = MRI.getVRegDef(SrcReg);
+ if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT)
+ continue;
+
+ auto *CFP = Def->getOperand(1).getFPImm();
+ if (!CFP || !CFP->getValueAPF().isPosZero())
+ continue;
+
+ // Use XLEN-wide integer zero
+ MachineIRBuilder MIB(MI);
+ const unsigned XLen = STI.getXLen();
+ auto Zero = MIB.buildConstant(LLT::scalar(XLen), 0);
+ MI.getOperand(0).setReg(Zero.getReg(0));
+
+ LLT ValTy = MRI.getType(SrcReg);
+ if (MRI.use_nodbg_empty(SrcReg))
+ Def->eraseFromParent();
+
+ [[maybe_unused]] unsigned ValBits = ValTy.getSizeInBits();
+ LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero "
+ "(XLEN={1}, ValBits={2}) : \n\t{3}\n",
+ DEBUG_TYPE, XLen, ValBits, MI));
+
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasFailedISel())
return false;
@@ -147,7 +198,12 @@ bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
F.hasMinSize());
RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
ST, MDT, LI);
- return Impl.combineMachineInstrs();
+
+ bool TableCombChanged = Impl.combineMachineInstrs();
+
+ bool LocalChanged = combineFPZeroStore(MF, ST);
+
+ return TableCombChanged || LocalChanged;
}
char RISCVPostLegalizerCombiner::ID = 0;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
index 52bcd653a8480..1323bfc1aefbc 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
@@ -7,14 +7,12 @@
define void @zero_f16(ptr %i) {
; RV32-LABEL: zero_f16:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: fmv.h.x fa5, zero
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: sh zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f16:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.h.x fa5, zero
-; RV64-NEXT: fsh fa5, 0(a0)
+; RV64-NEXT: sh zero, 0(a0)
; RV64-NEXT: ret
entry:
store half 0.0, ptr %i, align 4
@@ -24,14 +22,12 @@ entry:
define void @zero_bf16(ptr %i) {
; RV32-LABEL: zero_bf16:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: fmv.h.x fa5, zero
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: sh zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_bf16:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.h.x fa5, zero
-; RV64-NEXT: fsh fa5, 0(a0)
+; RV64-NEXT: sh zero, 0(a0)
; RV64-NEXT: ret
entry:
store bfloat 0.0, ptr %i, align 4
@@ -41,14 +37,12 @@ entry:
define void @zero_f32(ptr %i) {
; RV32-LABEL: zero_f32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: fmv.w.x fa5, zero
-; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_f32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.w.x fa5, zero
-; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: ret
entry:
store float 0.0, ptr %i, align 4
@@ -69,8 +63,7 @@ define void @zero_f64(ptr %i) {
;
; RV64-LABEL: zero_f64:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.d.x fa5, zero
-; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: ret
entry:
store double 0.0, ptr %i, align 8
@@ -80,14 +73,12 @@ entry:
define void @zero_v1f32(ptr %i) {
; RV32-LABEL: zero_v1f32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: fmv.w.x fa5, zero
-; RV32-NEXT: fsw fa5, 0(a0)
+; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v1f32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.w.x fa5, zero
-; RV64-NEXT: fsw fa5, 0(a0)
+; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: ret
entry:
store <1 x float> <float 0.0>, ptr %i, align 8
@@ -97,16 +88,14 @@ entry:
define void @zero_v2f32(ptr %i) {
; RV32-LABEL: zero_v2f32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: fmv.w.x fa5, zero
-; RV32-NEXT: fsw fa5, 0(a0)
-; RV32-NEXT: fsw fa5, 4(a0)
+; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v2f32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.w.x fa5, zero
-; RV64-NEXT: fsw fa5, 0(a0)
-; RV64-NEXT: fsw fa5, 4(a0)
+; RV64-NEXT: sw zero, 0(a0)
+; RV64-NEXT: sw zero, 4(a0)
; RV64-NEXT: ret
entry:
store <2 x float> <float 0.0, float 0.0>, ptr %i, align 8
@@ -116,20 +105,18 @@ entry:
define void @zero_v4f32(ptr %i) {
; RV32-LABEL: zero_v4f32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: fmv.w.x fa5, zero
-; RV32-NEXT: fsw fa5, 0(a0)
-; RV32-NEXT: fsw fa5, 4(a0)
-; RV32-NEXT: fsw fa5, 8(a0)
-; RV32-NEXT: fsw fa5, 12(a0)
+; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
+; RV32-NEXT: sw zero, 8(a0)
+; RV32-NEXT: sw zero, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: zero_v4f32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.w.x fa5, zero
-; RV64-NEXT: fsw fa5, 0(a0)
-; RV64-NEXT: fsw fa5, 4(a0)
-; RV64-NEXT: fsw fa5, 8(a0)
-; RV64-NEXT: fsw fa5, 12(a0)
+; RV64-NEXT: sw zero, 0(a0)
+; RV64-NEXT: sw zero, 4(a0)
+; RV64-NEXT: sw zero, 8(a0)
+; RV64-NEXT: sw zero, 12(a0)
; RV64-NEXT: ret
entry:
store <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, ptr %i, align 8
@@ -149,8 +136,7 @@ define void @zero_v1f64(ptr %i) {
;
; RV64-LABEL: zero_v1f64:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.d.x fa5, zero
-; RV64-NEXT: fsd fa5, 0(a0)
+; RV64-NEXT: sd zero, 0(a0)
; RV64-NEXT: ret
entry:
store <1 x double> <double 0.0>, ptr %i, align 8
@@ -172,9 +158,8 @@ define void @zero_v2f64(ptr %i) {
;
; RV64-LABEL: zero_v2f64:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.d.x fa5, zero
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: fsd fa5, 8(a0)
+; RV64-NEXT: sd zero, 0(a0)
+; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: ret
entry:
store <2 x double> <double 0.0, double 0.0>, ptr %i, align 8
@@ -200,11 +185,10 @@ define void @zero_v4f64(ptr %i) {
;
; RV64-LABEL: zero_v4f64:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: fmv.d.x fa5, zero
-; RV64-NEXT: fsd fa5, 0(a0)
-; RV64-NEXT: fsd fa5, 8(a0)
-; RV64-NEXT: fsd fa5, 16(a0)
-; RV64-NEXT: fsd fa5, 24(a0)
+; RV64-NEXT: sd zero, 0(a0)
+; RV64-NEXT: sd zero, 8(a0)
+; RV64-NEXT: sd zero, 16(a0)
+; RV64-NEXT: sd zero, 24(a0)
; RV64-NEXT: ret
entry:
store <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, ptr %i, align 8
>From 2d948587b93bf9e68ed8a32f19078a624bfb038e Mon Sep 17 00:00:00 2001
From: Shaoce SUN <sunshaoce at outlook.com>
Date: Tue, 14 Oct 2025 21:21:41 +0800
Subject: [PATCH 6/7] Use tablegen
---
.../GISel/RISCVPostLegalizerCombiner.cpp | 101 ++++++++----------
llvm/lib/Target/RISCV/RISCVCombine.td | 11 +-
2 files changed, 55 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
index 802acc8ff239c..dc2ded4064e06 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
@@ -43,6 +43,50 @@ namespace {
#include "RISCVGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
+/// Match: G_STORE (G_FCONSTANT +0.0), addr
+/// Return the source vreg in MatchInfo if matched.
+bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &MatchInfo) {
+ if (MI.getOpcode() != TargetOpcode::G_STORE)
+ return false;
+
+ Register SrcReg = MI.getOperand(0).getReg();
+ if (!SrcReg.isVirtual())
+ return false;
+
+ MachineInstr *Def = MRI.getVRegDef(SrcReg);
+ if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT)
+ return false;
+
+ auto *CFP = Def->getOperand(1).getFPImm();
+ if (!CFP || !CFP->getValueAPF().isPosZero())
+ return false;
+
+ MatchInfo = SrcReg;
+ return true;
+}
+
+/// Apply: rewrite to G_STORE (G_CONSTANT 0 [XLEN]), addr
+void applyFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, const RISCVSubtarget &STI,
+ Register &MatchInfo) {
+ const unsigned XLen = STI.getXLen();
+
+ auto Zero = B.buildConstant(LLT::scalar(XLen), 0);
+ MI.getOperand(0).setReg(Zero.getReg(0));
+
+ MachineInstr *Def = MRI.getVRegDef(MatchInfo);
+ if (Def && MRI.use_nodbg_empty(MatchInfo))
+ Def->eraseFromParent();
+
+#ifndef NDEBUG
+ unsigned ValBits = MRI.getType(MatchInfo).getSizeInBits();
+ LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero "
+ "(XLEN={1}, ValBits={2}):\n {3}\n",
+ DEBUG_TYPE, XLen, ValBits, MI));
+#endif
+}
+
class RISCVPostLegalizerCombinerImpl : public Combiner {
protected:
const CombinerHelper Helper;
@@ -99,8 +143,6 @@ class RISCVPostLegalizerCombiner : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool combineFPZeroStore(MachineFunction &MF, const RISCVSubtarget &STI);
-
private:
RISCVPostLegalizerCombinerImplRuleConfig RuleConfig;
};
@@ -125,54 +167,6 @@ RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner()
report_fatal_error("Invalid rule identifier");
}
-/// Try to fold:
-/// G_STORE (G_FCONSTANT +0.0), addr
-/// into:
-/// G_STORE (G_CONSTANT 0 [XLEN]), addr
-bool RISCVPostLegalizerCombiner::combineFPZeroStore(MachineFunction &MF,
- const RISCVSubtarget &STI) {
- bool Changed = false;
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- for (auto &MBB : MF) {
- for (auto &MI : MBB) {
- if (MI.getOpcode() != TargetOpcode::G_STORE)
- continue;
-
- Register SrcReg = MI.getOperand(0).getReg();
- if (!SrcReg.isVirtual())
- continue;
-
- MachineInstr *Def = MRI.getVRegDef(SrcReg);
- if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT)
- continue;
-
- auto *CFP = Def->getOperand(1).getFPImm();
- if (!CFP || !CFP->getValueAPF().isPosZero())
- continue;
-
- // Use XLEN-wide integer zero
- MachineIRBuilder MIB(MI);
- const unsigned XLen = STI.getXLen();
- auto Zero = MIB.buildConstant(LLT::scalar(XLen), 0);
- MI.getOperand(0).setReg(Zero.getReg(0));
-
- LLT ValTy = MRI.getType(SrcReg);
- if (MRI.use_nodbg_empty(SrcReg))
- Def->eraseFromParent();
-
- [[maybe_unused]] unsigned ValBits = ValTy.getSizeInBits();
- LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero "
- "(XLEN={1}, ValBits={2}) : \n\t{3}\n",
- DEBUG_TYPE, XLen, ValBits, MI));
-
- Changed = true;
- }
- }
-
- return Changed;
-}
-
bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasFailedISel())
return false;
@@ -198,12 +192,7 @@ bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
F.hasMinSize());
RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
ST, MDT, LI);
-
- bool TableCombChanged = Impl.combineMachineInstrs();
-
- bool LocalChanged = combineFPZeroStore(MF, ST);
-
- return TableCombChanged || LocalChanged;
+ return Impl.combineMachineInstrs();
}
char RISCVPostLegalizerCombiner::ID = 0;
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index 995dd0c5d82eb..75b71b580cbbe 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -19,11 +19,20 @@ def RISCVO0PreLegalizerCombiner: GICombiner<
"RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> {
}
+// Rule: fold store (fp +0.0) -> store (int zero [XLEN])
+def fp_zero_store_matchdata : GIDefMatchData<"Register">;
+def fold_fp_zero_store : GICombineRule<
+ (defs root:$root, fp_zero_store_matchdata:$matchinfo),
+ (match (G_STORE $src, $addr):$root,
+ [{ return matchFoldFPZeroStore(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyFoldFPZeroStore(*${root}, MRI, B, STI, ${matchinfo}); }])>;
+
// Post-legalization combines which are primarily optimizations.
// TODO: Add more combines.
def RISCVPostLegalizerCombiner
: GICombiner<"RISCVPostLegalizerCombinerImpl",
[sub_to_add, combines_for_extload, redundant_and,
identity_combines, shift_immed_chain,
- commute_constant_to_rhs, simplify_neg_minmax]> {
+ commute_constant_to_rhs, simplify_neg_minmax,
+ fold_fp_zero_store]> {
}
>From cb4c37d975ef735bb91204df55f07c8998b7d0e1 Mon Sep 17 00:00:00 2001
From: Shaoce SUN <sunshaoce at outlook.com>
Date: Wed, 15 Oct 2025 11:14:33 +0800
Subject: [PATCH 7/7] add test for f64 on rv32
---
.../GISel/RISCVPostLegalizerCombiner.cpp | 8 +-
llvm/lib/Target/RISCV/RISCVCombine.td | 2 +-
.../RISCV/GlobalISel/store-fp-zero-to-x0.ll | 388 ++++++++++++------
3 files changed, 264 insertions(+), 134 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
index dc2ded4064e06..f2b216be1db15 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
@@ -46,7 +46,7 @@ namespace {
/// Match: G_STORE (G_FCONSTANT +0.0), addr
/// Return the source vreg in MatchInfo if matched.
bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI,
- Register &MatchInfo) {
+ const RISCVSubtarget &STI, Register &MatchInfo) {
if (MI.getOpcode() != TargetOpcode::G_STORE)
return false;
@@ -62,6 +62,12 @@ bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI,
if (!CFP || !CFP->getValueAPF().isPosZero())
return false;
+ unsigned ValBits = MRI.getType(SrcReg).getSizeInBits();
+ if ((ValBits == 16 && !STI.hasStdExtZfh()) ||
+ (ValBits == 32 && !STI.hasStdExtF()) ||
+ (ValBits == 64 && (!STI.hasStdExtD() || !STI.is64Bit())))
+ return false;
+
MatchInfo = SrcReg;
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index 75b71b580cbbe..a06b60d8cce07 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -24,7 +24,7 @@ def fp_zero_store_matchdata : GIDefMatchData<"Register">;
def fold_fp_zero_store : GICombineRule<
(defs root:$root, fp_zero_store_matchdata:$matchinfo),
(match (G_STORE $src, $addr):$root,
- [{ return matchFoldFPZeroStore(*${root}, MRI, ${matchinfo}); }]),
+ [{ return matchFoldFPZeroStore(*${root}, MRI, STI, ${matchinfo}); }]),
(apply [{ applyFoldFPZeroStore(*${root}, MRI, B, STI, ${matchinfo}); }])>;
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
index 1323bfc1aefbc..bc79c6f650291 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
@@ -1,49 +1,83 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=riscv32 -mattr=+f,+zfh < %s \
-; RUN: | FileCheck %s --check-prefix=RV32
+; RUN: | FileCheck %s --check-prefix=RV32F
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+d,+zfh < %s \
+; RUN: | FileCheck %s --check-prefix=RV32D
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+f,+zfh < %s \
+; RUN: | FileCheck %s --check-prefix=RV64F
; RUN: llc -global-isel -mtriple=riscv64 -mattr=+d,+zfh < %s \
-; RUN: | FileCheck %s --check-prefix=RV64
+; RUN: | FileCheck %s --check-prefix=RV64D
define void @zero_f16(ptr %i) {
-; RV32-LABEL: zero_f16:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: sh zero, 0(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_f16:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sh zero, 0(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_f16:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sh zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_f16:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sh zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_f16:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sh zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_f16:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sh zero, 0(a0)
+; RV64D-NEXT: ret
entry:
store half 0.0, ptr %i, align 4
ret void
}
define void @zero_bf16(ptr %i) {
-; RV32-LABEL: zero_bf16:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: sh zero, 0(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_bf16:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sh zero, 0(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_bf16:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sh zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_bf16:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sh zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_bf16:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sh zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_bf16:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sh zero, 0(a0)
+; RV64D-NEXT: ret
entry:
store bfloat 0.0, ptr %i, align 4
ret void
}
define void @zero_f32(ptr %i) {
-; RV32-LABEL: zero_f32:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: sw zero, 0(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_f32:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sw zero, 0(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: ret
entry:
store float 0.0, ptr %i, align 4
ret void
@@ -51,145 +85,235 @@ entry:
define void @zero_f64(ptr %i) {
-; RV32-LABEL: zero_f64:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a1, %hi(.LCPI3_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI3_0)
-; RV32-NEXT: lw a2, 0(a1)
-; RV32-NEXT: lw a1, 4(a1)
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_f64:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sd zero, 0(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI3_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI3_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: ret
entry:
store double 0.0, ptr %i, align 8
ret void
}
define void @zero_v1f32(ptr %i) {
-; RV32-LABEL: zero_v1f32:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: sw zero, 0(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_v1f32:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sw zero, 0(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_v1f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v1f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v1f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v1f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: ret
entry:
store <1 x float> <float 0.0>, ptr %i, align 8
ret void
}
define void @zero_v2f32(ptr %i) {
-; RV32-LABEL: zero_v2f32:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: sw zero, 0(a0)
-; RV32-NEXT: sw zero, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_v2f32:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sw zero, 0(a0)
-; RV64-NEXT: sw zero, 4(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_v2f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: sw zero, 4(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v2f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: sw zero, 4(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v2f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: sw zero, 4(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v2f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: sw zero, 4(a0)
+; RV64D-NEXT: ret
entry:
store <2 x float> <float 0.0, float 0.0>, ptr %i, align 8
ret void
}
define void @zero_v4f32(ptr %i) {
-; RV32-LABEL: zero_v4f32:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: sw zero, 0(a0)
-; RV32-NEXT: sw zero, 4(a0)
-; RV32-NEXT: sw zero, 8(a0)
-; RV32-NEXT: sw zero, 12(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_v4f32:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sw zero, 0(a0)
-; RV64-NEXT: sw zero, 4(a0)
-; RV64-NEXT: sw zero, 8(a0)
-; RV64-NEXT: sw zero, 12(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_v4f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: sw zero, 4(a0)
+; RV32F-NEXT: sw zero, 8(a0)
+; RV32F-NEXT: sw zero, 12(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v4f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: sw zero, 4(a0)
+; RV32D-NEXT: sw zero, 8(a0)
+; RV32D-NEXT: sw zero, 12(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v4f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: sw zero, 4(a0)
+; RV64F-NEXT: sw zero, 8(a0)
+; RV64F-NEXT: sw zero, 12(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v4f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: sw zero, 4(a0)
+; RV64D-NEXT: sw zero, 8(a0)
+; RV64D-NEXT: sw zero, 12(a0)
+; RV64D-NEXT: ret
entry:
store <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, ptr %i, align 8
ret void
}
define void @zero_v1f64(ptr %i) {
-; RV32-LABEL: zero_v1f64:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a1, %hi(.LCPI7_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI7_0)
-; RV32-NEXT: lw a2, 0(a1)
-; RV32-NEXT: lw a1, 4(a1)
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_v1f64:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sd zero, 0(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_v1f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI7_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI7_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v1f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v1f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v1f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: ret
entry:
store <1 x double> <double 0.0>, ptr %i, align 8
ret void
}
define void @zero_v2f64(ptr %i) {
-; RV32-LABEL: zero_v2f64:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a1, %hi(.LCPI8_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0)
-; RV32-NEXT: lw a2, 0(a1)
-; RV32-NEXT: lw a1, 4(a1)
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: sw a2, 8(a0)
-; RV32-NEXT: sw a1, 12(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_v2f64:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sd zero, 0(a0)
-; RV64-NEXT: sd zero, 8(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_v2f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI8_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI8_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: sw a2, 8(a0)
+; RV32F-NEXT: sw a1, 12(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v2f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: fsd fa5, 8(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v2f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: sd zero, 8(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v2f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: sd zero, 8(a0)
+; RV64D-NEXT: ret
entry:
store <2 x double> <double 0.0, double 0.0>, ptr %i, align 8
ret void
}
define void @zero_v4f64(ptr %i) {
-; RV32-LABEL: zero_v4f64:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a1, %hi(.LCPI9_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI9_0)
-; RV32-NEXT: lw a2, 0(a1)
-; RV32-NEXT: lw a1, 4(a1)
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 4(a0)
-; RV32-NEXT: sw a2, 8(a0)
-; RV32-NEXT: sw a1, 12(a0)
-; RV32-NEXT: sw a2, 16(a0)
-; RV32-NEXT: sw a1, 20(a0)
-; RV32-NEXT: sw a2, 24(a0)
-; RV32-NEXT: sw a1, 28(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: zero_v4f64:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sd zero, 0(a0)
-; RV64-NEXT: sd zero, 8(a0)
-; RV64-NEXT: sd zero, 16(a0)
-; RV64-NEXT: sd zero, 24(a0)
-; RV64-NEXT: ret
+; RV32F-LABEL: zero_v4f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI9_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI9_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: sw a2, 8(a0)
+; RV32F-NEXT: sw a1, 12(a0)
+; RV32F-NEXT: sw a2, 16(a0)
+; RV32F-NEXT: sw a1, 20(a0)
+; RV32F-NEXT: sw a2, 24(a0)
+; RV32F-NEXT: sw a1, 28(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v4f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: fsd fa5, 8(a0)
+; RV32D-NEXT: fsd fa5, 16(a0)
+; RV32D-NEXT: fsd fa5, 24(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v4f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: sd zero, 8(a0)
+; RV64F-NEXT: sd zero, 16(a0)
+; RV64F-NEXT: sd zero, 24(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v4f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: sd zero, 8(a0)
+; RV64D-NEXT: sd zero, 16(a0)
+; RV64D-NEXT: sd zero, 24(a0)
+; RV64D-NEXT: ret
entry:
store <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, ptr %i, align 8
ret void
More information about the llvm-commits
mailing list