[llvm] 01f2955 - [RISCV][GISel] Fold G_FCONSTANT 0.0 store into G_CONSTANT x0 (#163008)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 00:50:08 PDT 2025
Author: Shaoce SUN
Date: 2025-10-23T15:50:04+08:00
New Revision: 01f29552d87ba70d876e76fdedea839a2f2b2a2a
URL: https://github.com/llvm/llvm-project/commit/01f29552d87ba70d876e76fdedea839a2f2b2a2a
DIFF: https://github.com/llvm/llvm-project/commit/01f29552d87ba70d876e76fdedea839a2f2b2a2a.diff
LOG: [RISCV][GISel] Fold G_FCONSTANT 0.0 store into G_CONSTANT x0 (#163008)
Added:
llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
Modified:
llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
llvm/lib/Target/RISCV/RISCVCombine.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
index 67b510dc80f1e..f2b216be1db15 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/FormatVariadic.h"
#define GET_GICOMBINER_DEPS
#include "RISCVGenPostLegalizeGICombiner.inc"
@@ -42,6 +43,56 @@ namespace {
#include "RISCVGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
+/// Match: G_STORE (G_FCONSTANT +0.0), addr
+/// Return the source vreg in MatchInfo if matched.
+bool matchFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const RISCVSubtarget &STI, Register &MatchInfo) {
+ if (MI.getOpcode() != TargetOpcode::G_STORE)
+ return false;
+
+ Register SrcReg = MI.getOperand(0).getReg();
+ if (!SrcReg.isVirtual())
+ return false;
+
+ MachineInstr *Def = MRI.getVRegDef(SrcReg);
+ if (!Def || Def->getOpcode() != TargetOpcode::G_FCONSTANT)
+ return false;
+
+ auto *CFP = Def->getOperand(1).getFPImm();
+ if (!CFP || !CFP->getValueAPF().isPosZero())
+ return false;
+
+ unsigned ValBits = MRI.getType(SrcReg).getSizeInBits();
+ if ((ValBits == 16 && !STI.hasStdExtZfh()) ||
+ (ValBits == 32 && !STI.hasStdExtF()) ||
+ (ValBits == 64 && (!STI.hasStdExtD() || !STI.is64Bit())))
+ return false;
+
+ MatchInfo = SrcReg;
+ return true;
+}
+
+/// Apply: rewrite to G_STORE (G_CONSTANT 0 [XLEN]), addr
+void applyFoldFPZeroStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, const RISCVSubtarget &STI,
+ Register &MatchInfo) {
+ const unsigned XLen = STI.getXLen();
+
+ auto Zero = B.buildConstant(LLT::scalar(XLen), 0);
+ MI.getOperand(0).setReg(Zero.getReg(0));
+
+ MachineInstr *Def = MRI.getVRegDef(MatchInfo);
+ if (Def && MRI.use_nodbg_empty(MatchInfo))
+ Def->eraseFromParent();
+
+#ifndef NDEBUG
+ unsigned ValBits = MRI.getType(MatchInfo).getSizeInBits();
+ LLVM_DEBUG(dbgs() << formatv("[{0}] Fold FP zero store -> int zero "
+ "(XLEN={1}, ValBits={2}):\n {3}\n",
+ DEBUG_TYPE, XLen, ValBits, MI));
+#endif
+}
+
class RISCVPostLegalizerCombinerImpl : public Combiner {
protected:
const CombinerHelper Helper;
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index 995dd0c5d82eb..a06b60d8cce07 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -19,11 +19,20 @@ def RISCVO0PreLegalizerCombiner: GICombiner<
"RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> {
}
+// Rule: fold store (fp +0.0) -> store (int zero [XLEN])
+def fp_zero_store_matchdata : GIDefMatchData<"Register">;
+def fold_fp_zero_store : GICombineRule<
+ (defs root:$root, fp_zero_store_matchdata:$matchinfo),
+ (match (G_STORE $src, $addr):$root,
+ [{ return matchFoldFPZeroStore(*${root}, MRI, STI, ${matchinfo}); }]),
+ (apply [{ applyFoldFPZeroStore(*${root}, MRI, B, STI, ${matchinfo}); }])>;
+
// Post-legalization combines which are primarily optimizations.
// TODO: Add more combines.
def RISCVPostLegalizerCombiner
: GICombiner<"RISCVPostLegalizerCombinerImpl",
[sub_to_add, combines_for_extload, redundant_and,
identity_combines, shift_immed_chain,
- commute_constant_to_rhs, simplify_neg_minmax]> {
+ commute_constant_to_rhs, simplify_neg_minmax,
+ fold_fp_zero_store]> {
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
new file mode 100644
index 0000000000000..bc79c6f650291
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/store-fp-zero-to-x0.ll
@@ -0,0 +1,320 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+f,+zfh < %s \
+; RUN: | FileCheck %s --check-prefix=RV32F
+; RUN: llc -global-isel -mtriple=riscv32 -mattr=+d,+zfh < %s \
+; RUN: | FileCheck %s --check-prefix=RV32D
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+f,+zfh < %s \
+; RUN: | FileCheck %s --check-prefix=RV64F
+; RUN: llc -global-isel -mtriple=riscv64 -mattr=+d,+zfh < %s \
+; RUN: | FileCheck %s --check-prefix=RV64D
+
+define void @zero_f16(ptr %i) {
+; RV32F-LABEL: zero_f16:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sh zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_f16:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sh zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_f16:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sh zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_f16:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sh zero, 0(a0)
+; RV64D-NEXT: ret
+entry:
+ store half 0.0, ptr %i, align 4
+ ret void
+}
+
+define void @zero_bf16(ptr %i) {
+; RV32F-LABEL: zero_bf16:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sh zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_bf16:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sh zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_bf16:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sh zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_bf16:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sh zero, 0(a0)
+; RV64D-NEXT: ret
+entry:
+ store bfloat 0.0, ptr %i, align 4
+ ret void
+}
+
+define void @zero_f32(ptr %i) {
+; RV32F-LABEL: zero_f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: ret
+entry:
+ store float 0.0, ptr %i, align 4
+ ret void
+}
+
+
+define void @zero_f64(ptr %i) {
+; RV32F-LABEL: zero_f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI3_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI3_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: ret
+entry:
+ store double 0.0, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v1f32(ptr %i) {
+; RV32F-LABEL: zero_v1f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v1f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v1f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v1f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: ret
+entry:
+ store <1 x float> <float 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v2f32(ptr %i) {
+; RV32F-LABEL: zero_v2f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: sw zero, 4(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v2f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: sw zero, 4(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v2f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: sw zero, 4(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v2f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: sw zero, 4(a0)
+; RV64D-NEXT: ret
+entry:
+ store <2 x float> <float 0.0, float 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v4f32(ptr %i) {
+; RV32F-LABEL: zero_v4f32:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: sw zero, 0(a0)
+; RV32F-NEXT: sw zero, 4(a0)
+; RV32F-NEXT: sw zero, 8(a0)
+; RV32F-NEXT: sw zero, 12(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v4f32:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: sw zero, 0(a0)
+; RV32D-NEXT: sw zero, 4(a0)
+; RV32D-NEXT: sw zero, 8(a0)
+; RV32D-NEXT: sw zero, 12(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v4f32:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sw zero, 0(a0)
+; RV64F-NEXT: sw zero, 4(a0)
+; RV64F-NEXT: sw zero, 8(a0)
+; RV64F-NEXT: sw zero, 12(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v4f32:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sw zero, 0(a0)
+; RV64D-NEXT: sw zero, 4(a0)
+; RV64D-NEXT: sw zero, 8(a0)
+; RV64D-NEXT: sw zero, 12(a0)
+; RV64D-NEXT: ret
+entry:
+ store <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v1f64(ptr %i) {
+; RV32F-LABEL: zero_v1f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI7_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI7_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v1f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v1f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v1f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: ret
+entry:
+ store <1 x double> <double 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v2f64(ptr %i) {
+; RV32F-LABEL: zero_v2f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI8_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI8_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: sw a2, 8(a0)
+; RV32F-NEXT: sw a1, 12(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v2f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: fsd fa5, 8(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v2f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: sd zero, 8(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v2f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: sd zero, 8(a0)
+; RV64D-NEXT: ret
+entry:
+ store <2 x double> <double 0.0, double 0.0>, ptr %i, align 8
+ ret void
+}
+
+define void @zero_v4f64(ptr %i) {
+; RV32F-LABEL: zero_v4f64:
+; RV32F: # %bb.0: # %entry
+; RV32F-NEXT: lui a1, %hi(.LCPI9_0)
+; RV32F-NEXT: addi a1, a1, %lo(.LCPI9_0)
+; RV32F-NEXT: lw a2, 0(a1)
+; RV32F-NEXT: lw a1, 4(a1)
+; RV32F-NEXT: sw a2, 0(a0)
+; RV32F-NEXT: sw a1, 4(a0)
+; RV32F-NEXT: sw a2, 8(a0)
+; RV32F-NEXT: sw a1, 12(a0)
+; RV32F-NEXT: sw a2, 16(a0)
+; RV32F-NEXT: sw a1, 20(a0)
+; RV32F-NEXT: sw a2, 24(a0)
+; RV32F-NEXT: sw a1, 28(a0)
+; RV32F-NEXT: ret
+;
+; RV32D-LABEL: zero_v4f64:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fcvt.d.w fa5, zero
+; RV32D-NEXT: fsd fa5, 0(a0)
+; RV32D-NEXT: fsd fa5, 8(a0)
+; RV32D-NEXT: fsd fa5, 16(a0)
+; RV32D-NEXT: fsd fa5, 24(a0)
+; RV32D-NEXT: ret
+;
+; RV64F-LABEL: zero_v4f64:
+; RV64F: # %bb.0: # %entry
+; RV64F-NEXT: sd zero, 0(a0)
+; RV64F-NEXT: sd zero, 8(a0)
+; RV64F-NEXT: sd zero, 16(a0)
+; RV64F-NEXT: sd zero, 24(a0)
+; RV64F-NEXT: ret
+;
+; RV64D-LABEL: zero_v4f64:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: sd zero, 0(a0)
+; RV64D-NEXT: sd zero, 8(a0)
+; RV64D-NEXT: sd zero, 16(a0)
+; RV64D-NEXT: sd zero, 24(a0)
+; RV64D-NEXT: ret
+entry:
+ store <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, ptr %i, align 8
+ ret void
+}
More information about the llvm-commits
mailing list