[llvm] [RISCV] Set riscv-fpimm-cost threshold to 3 by default (PR #159352)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 17 05:53:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Alex Bradbury (asb)
<details>
<summary>Changes</summary>
`-riscv-fp-imm-cost` controls the threshold at which the constant pool is used for float constants rather than generating directly (typically into a GPR followed by an `fmv`). The value used for this knob indicates the number of instructions that can be used to produce the value (otherwise we fall back to the constant pool). Upping to to 3 covers a huge number of additional constants (see
<https://github.com/llvm/llvm-project/issues/153402>), e.g. most whole numbers which can be generated through lui+shift+fmv. As in general we struggle with efficient code generation for constant pool accesses, reducing the number of constant pool accesses is beneficial. We are typically replacing a two-instruction sequence (which includes a load) with a three instruction sequence (two simple arithmetic operations plus a fmv), which.
The CHECK prefixes for various tests had to be updated to avoid conflicts leading to check lines being dropped altogether (see <https://github.com/llvm/llvm-project/pull/159321> for a change to update_llc_test_checks to aid diagnosing this).
---
Patch is 1.34 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159352.diff
72 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+1-1)
- (modified) llvm/test/CodeGen/RISCV/bfloat-convert.ll (+130-89)
- (modified) llvm/test/CodeGen/RISCV/bfloat-imm.ll (+3-2)
- (modified) llvm/test/CodeGen/RISCV/calling-conv-half.ll (+152-132)
- (modified) llvm/test/CodeGen/RISCV/codemodel-lowering.ll (+42-64)
- (modified) llvm/test/CodeGen/RISCV/double-convert.ll (+34-28)
- (modified) llvm/test/CodeGen/RISCV/double-imm.ll (+3-2)
- (modified) llvm/test/CodeGen/RISCV/double-intrinsics.ll (+18-12)
- (modified) llvm/test/CodeGen/RISCV/double-round-conv.ll (+15-10)
- (modified) llvm/test/CodeGen/RISCV/double-zfa.ll (+36-15)
- (modified) llvm/test/CodeGen/RISCV/float-convert.ll (+34-28)
- (modified) llvm/test/CodeGen/RISCV/float-imm.ll (+3-2)
- (modified) llvm/test/CodeGen/RISCV/float-round-conv-sat.ll (+36-24)
- (modified) llvm/test/CodeGen/RISCV/half-arith.ll (+14-33)
- (modified) llvm/test/CodeGen/RISCV/half-convert.ll (+411-329)
- (modified) llvm/test/CodeGen/RISCV/half-imm.ll (+9-6)
- (modified) llvm/test/CodeGen/RISCV/half-intrinsics.ll (+18-12)
- (modified) llvm/test/CodeGen/RISCV/half-round-conv-sat.ll (+108-72)
- (modified) llvm/test/CodeGen/RISCV/half-round-conv.ll (+45-30)
- (modified) llvm/test/CodeGen/RISCV/half-select-fcmp.ll (+17-15)
- (modified) llvm/test/CodeGen/RISCV/half-zfa-fli.ll (+39-26)
- (modified) llvm/test/CodeGen/RISCV/half-zfa.ll (+9-6)
- (modified) llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll (+3-2)
- (modified) llvm/test/CodeGen/RISCV/rv64-double-convert.ll (+12-9)
- (modified) llvm/test/CodeGen/RISCV/rv64-float-convert.ll (+7-5)
- (modified) llvm/test/CodeGen/RISCV/rv64-half-convert.ll (+20-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll (+533-263)
- (modified) llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll (+72-48)
- (modified) llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll (+266-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll (+266-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll (+1040-258)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll (+1040-258)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll (+125-63)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (+432-116)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll (+266-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll (+266-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll (+148-74)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll (+526-256)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+674-326)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll (+470-228)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll (+1040-258)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll (+1040-258)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll (+1040-258)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll (+121-60)
- (modified) llvm/test/CodeGen/RISCV/rvv/floor-vp.ll (+1036-262)
- (modified) llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll (+266-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll (+234-64)
- (modified) llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll (+266-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll (+266-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll (+148-74)
- (modified) llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll (+234-64)
- (modified) llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll (+36-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll (+1036-262)
- (modified) llvm/test/CodeGen/RISCV/rvv/rint-vp.ll (+924-234)
- (modified) llvm/test/CodeGen/RISCV/rvv/round-vp.ll (+1036-262)
- (modified) llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll (+1036-262)
- (modified) llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll (+1036-262)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll (+52-25)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll (+6-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll (+6-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/srodata.ll (-15)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 10b3f0b213811..9de57a2879d5b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -79,7 +79,7 @@ static cl::opt<int>
FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
cl::desc("Give the maximum number of instructions that we will "
"use for creating a floating-point immediate value"),
- cl::init(2));
+ cl::init(3));
static cl::opt<bool>
ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index 6207a17734d62..73ff888e44b3b 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -51,13 +51,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16_sat:
; CHECK32ZFBFMIN: # %bb.0: # %start
; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5
-; CHECK32ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0)
; CHECK32ZFBFMIN-NEXT: lui a0, 815104
-; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, a0
-; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3
-; CHECK32ZFBFMIN-NEXT: neg a0, a1
+; CHECK32ZFBFMIN-NEXT: lui a1, 290816
+; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0
+; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5
+; CHECK32ZFBFMIN-NEXT: addi a1, a1, -512
+; CHECK32ZFBFMIN-NEXT: neg a0, a0
+; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a1
; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz
; CHECK32ZFBFMIN-NEXT: and a0, a0, a1
@@ -68,12 +69,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 815104
; RV32ID-NEXT: fmv.w.x fa5, a1
-; RV32ID-NEXT: lui a1, %hi(.LCPI1_0)
+; RV32ID-NEXT: lui a1, 290816
; RV32ID-NEXT: slli a0, a0, 16
-; RV32ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
-; RV32ID-NEXT: fmv.w.x fa3, a0
-; RV32ID-NEXT: feq.s a0, fa3, fa3
-; RV32ID-NEXT: fmax.s fa5, fa3, fa5
+; RV32ID-NEXT: addi a1, a1, -512
+; RV32ID-NEXT: fmv.w.x fa4, a0
+; RV32ID-NEXT: feq.s a0, fa4, fa4
+; RV32ID-NEXT: fmax.s fa5, fa4, fa5
+; RV32ID-NEXT: fmv.w.x fa4, a1
; RV32ID-NEXT: neg a0, a0
; RV32ID-NEXT: fmin.s fa5, fa5, fa4
; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz
@@ -83,13 +85,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16_sat:
; CHECK64ZFBFMIN: # %bb.0: # %start
; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5
-; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0)
; CHECK64ZFBFMIN-NEXT: lui a0, 815104
-; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a0
-; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3
-; CHECK64ZFBFMIN-NEXT: neg a0, a1
+; CHECK64ZFBFMIN-NEXT: lui a1, 290816
+; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0
+; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5
+; CHECK64ZFBFMIN-NEXT: addi a1, a1, -512
+; CHECK64ZFBFMIN-NEXT: neg a0, a0
+; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a1
; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz
; CHECK64ZFBFMIN-NEXT: and a0, a0, a1
@@ -100,12 +103,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 815104
; RV64ID-NEXT: fmv.w.x fa5, a1
-; RV64ID-NEXT: lui a1, %hi(.LCPI1_0)
+; RV64ID-NEXT: lui a1, 290816
; RV64ID-NEXT: slli a0, a0, 16
-; RV64ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
-; RV64ID-NEXT: fmv.w.x fa3, a0
-; RV64ID-NEXT: feq.s a0, fa3, fa3
-; RV64ID-NEXT: fmax.s fa5, fa3, fa5
+; RV64ID-NEXT: addi a1, a1, -512
+; RV64ID-NEXT: fmv.w.x fa4, a0
+; RV64ID-NEXT: feq.s a0, fa4, fa4
+; RV64ID-NEXT: fmax.s fa5, fa4, fa5
+; RV64ID-NEXT: fmv.w.x fa4, a1
; RV64ID-NEXT: neg a0, a0
; RV64ID-NEXT: fmin.s fa5, fa5, fa4
; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz
@@ -152,49 +156,53 @@ define i16 @fcvt_ui_bf16(bfloat %a) nounwind {
define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
; CHECK32ZFBFMIN: # %bb.0: # %start
-; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, zero
-; CHECK32ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3
-; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5
+; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero
+; CHECK32ZFBFMIN-NEXT: lui a0, 292864
+; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT: addi a0, a0, -256
+; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0
+; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; CHECK32ZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_ui_bf16_sat:
; RV32ID: # %bb.0: # %start
-; RV32ID-NEXT: lui a0, %hi(.LCPI3_0)
-; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
; RV32ID-NEXT: fmv.x.w a0, fa0
+; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: slli a0, a0, 16
; RV32ID-NEXT: fmv.w.x fa4, a0
-; RV32ID-NEXT: fmv.w.x fa3, zero
-; RV32ID-NEXT: fmax.s fa4, fa4, fa3
-; RV32ID-NEXT: fmin.s fa5, fa4, fa5
+; RV32ID-NEXT: lui a0, 292864
+; RV32ID-NEXT: addi a0, a0, -256
+; RV32ID-NEXT: fmax.s fa5, fa4, fa5
+; RV32ID-NEXT: fmv.w.x fa4, a0
+; RV32ID-NEXT: fmin.s fa5, fa5, fa4
; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz
; RV32ID-NEXT: ret
;
; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
; CHECK64ZFBFMIN: # %bb.0: # %start
-; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK64ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, zero
-; CHECK64ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3
-; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5
+; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, zero
+; CHECK64ZFBFMIN-NEXT: lui a0, 292864
+; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT: addi a0, a0, -256
+; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0
+; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK64ZFBFMIN-NEXT: fcvt.lu.s a0, fa5, rtz
; CHECK64ZFBFMIN-NEXT: ret
;
; RV64ID-LABEL: fcvt_ui_bf16_sat:
; RV64ID: # %bb.0: # %start
-; RV64ID-NEXT: lui a0, %hi(.LCPI3_0)
-; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa5, zero
; RV64ID-NEXT: slli a0, a0, 16
; RV64ID-NEXT: fmv.w.x fa4, a0
-; RV64ID-NEXT: fmv.w.x fa3, zero
-; RV64ID-NEXT: fmax.s fa4, fa4, fa3
-; RV64ID-NEXT: fmin.s fa5, fa4, fa5
+; RV64ID-NEXT: lui a0, 292864
+; RV64ID-NEXT: addi a0, a0, -256
+; RV64ID-NEXT: fmax.s fa5, fa4, fa5
+; RV64ID-NEXT: fmv.w.x fa4, a0
+; RV64ID-NEXT: fmin.s fa5, fa5, fa4
; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz
; RV64ID-NEXT: ret
start:
@@ -472,20 +480,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
; RV32IZFBFMIN-NEXT: mv a2, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
-; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IZFBFMIN-NEXT: lui a1, 389120
+; RV32IZFBFMIN-NEXT: addi a1, a1, -1
+; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a1
; RV32IZFBFMIN-NEXT: flt.s a1, fa5, fs0
; RV32IZFBFMIN-NEXT: beqz a1, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
; RV32IZFBFMIN-NEXT: addi a2, a3, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
; RV32IZFBFMIN-NEXT: feq.s a3, fs0, fs0
-; RV32IZFBFMIN-NEXT: neg a4, a1
-; RV32IZFBFMIN-NEXT: neg a1, s0
+; RV32IZFBFMIN-NEXT: neg a4, s0
+; RV32IZFBFMIN-NEXT: neg a5, a1
; RV32IZFBFMIN-NEXT: neg a3, a3
-; RV32IZFBFMIN-NEXT: and a0, a1, a0
+; RV32IZFBFMIN-NEXT: and a0, a4, a0
; RV32IZFBFMIN-NEXT: and a1, a3, a2
-; RV32IZFBFMIN-NEXT: or a0, a4, a0
+; RV32IZFBFMIN-NEXT: or a0, a5, a0
; RV32IZFBFMIN-NEXT: and a0, a3, a0
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -511,20 +520,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
; R32IDZFBFMIN-NEXT: mv a2, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
-; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
+; R32IDZFBFMIN-NEXT: lui a1, 389120
+; R32IDZFBFMIN-NEXT: addi a1, a1, -1
+; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a1
; R32IDZFBFMIN-NEXT: flt.s a1, fa5, fs0
; R32IDZFBFMIN-NEXT: beqz a1, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
; R32IDZFBFMIN-NEXT: addi a2, a3, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
; R32IDZFBFMIN-NEXT: feq.s a3, fs0, fs0
-; R32IDZFBFMIN-NEXT: neg a4, a1
-; R32IDZFBFMIN-NEXT: neg a1, s0
+; R32IDZFBFMIN-NEXT: neg a4, s0
+; R32IDZFBFMIN-NEXT: neg a5, a1
; R32IDZFBFMIN-NEXT: neg a3, a3
-; R32IDZFBFMIN-NEXT: and a0, a1, a0
+; R32IDZFBFMIN-NEXT: and a0, a4, a0
; R32IDZFBFMIN-NEXT: and a1, a3, a2
-; R32IDZFBFMIN-NEXT: or a0, a4, a0
+; R32IDZFBFMIN-NEXT: or a0, a5, a0
; R32IDZFBFMIN-NEXT: and a0, a3, a0
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -552,8 +562,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: # %bb.1: # %start
; RV32ID-NEXT: mv a2, a1
; RV32ID-NEXT: .LBB10_2: # %start
-; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
+; RV32ID-NEXT: lui a1, 389120
+; RV32ID-NEXT: addi a1, a1, -1
+; RV32ID-NEXT: fmv.w.x fa5, a1
; RV32ID-NEXT: flt.s a1, fa5, fs0
; RV32ID-NEXT: beqz a1, .LBB10_4
; RV32ID-NEXT: # %bb.3:
@@ -641,30 +652,59 @@ define i64 @fcvt_lu_bf16(bfloat %a) nounwind {
}
define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
-; CHECK32ZFBFMIN-LABEL: fcvt_lu_bf16_sat:
-; CHECK32ZFBFMIN: # %bb.0: # %start
-; CHECK32ZFBFMIN-NEXT: addi sp, sp, -16
-; CHECK32ZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI12_0)(a0)
-; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0
-; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero
-; CHECK32ZFBFMIN-NEXT: fle.s a0, fa4, fa0
-; CHECK32ZFBFMIN-NEXT: flt.s a1, fa5, fa0
-; CHECK32ZFBFMIN-NEXT: neg s0, a1
-; CHECK32ZFBFMIN-NEXT: neg s1, a0
-; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
-; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
-; CHECK32ZFBFMIN-NEXT: and a1, s1, a1
-; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
-; CHECK32ZFBFMIN-NEXT: or a1, s0, a1
-; CHECK32ZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT: addi sp, sp, 16
-; CHECK32ZFBFMIN-NEXT: ret
+; RV32IZFBFMIN-LABEL: fcvt_lu_bf16_sat:
+; RV32IZFBFMIN: # %bb.0: # %start
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
+; RV32IZFBFMIN-NEXT: fmv.w.x fa5, zero
+; RV32IZFBFMIN-NEXT: fle.s a0, fa5, fs0
+; RV32IZFBFMIN-NEXT: neg s0, a0
+; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
+; RV32IZFBFMIN-NEXT: call __fixunssfdi
+; RV32IZFBFMIN-NEXT: and a0, s0, a0
+; RV32IZFBFMIN-NEXT: lui a2, 391168
+; RV32IZFBFMIN-NEXT: and a1, s0, a1
+; RV32IZFBFMIN-NEXT: addi a2, a2, -1
+; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a2
+; RV32IZFBFMIN-NEXT: flt.s a2, fa5, fs0
+; RV32IZFBFMIN-NEXT: neg a2, a2
+; RV32IZFBFMIN-NEXT: or a0, a2, a0
+; RV32IZFBFMIN-NEXT: or a1, a2, a1
+; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; R32IDZFBFMIN-LABEL: fcvt_lu_bf16_sat:
+; R32IDZFBFMIN: # %bb.0: # %start
+; R32IDZFBFMIN-NEXT: addi sp, sp, -16
+; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
+; R32IDZFBFMIN-NEXT: fmv.w.x fa5, zero
+; R32IDZFBFMIN-NEXT: fle.s a0, fa5, fs0
+; R32IDZFBFMIN-NEXT: neg s0, a0
+; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
+; R32IDZFBFMIN-NEXT: call __fixunssfdi
+; R32IDZFBFMIN-NEXT: and a0, s0, a0
+; R32IDZFBFMIN-NEXT: lui a2, 391168
+; R32IDZFBFMIN-NEXT: and a1, s0, a1
+; R32IDZFBFMIN-NEXT: addi a2, a2, -1
+; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a2
+; R32IDZFBFMIN-NEXT: flt.s a2, fa5, fs0
+; R32IDZFBFMIN-NEXT: neg a2, a2
+; R32IDZFBFMIN-NEXT: or a0, a2, a0
+; R32IDZFBFMIN-NEXT: or a1, a2, a1
+; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
+; R32IDZFBFMIN-NEXT: addi sp, sp, 16
+; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_lu_bf16_sat:
; RV32ID: # %bb.0: # %start
@@ -673,15 +713,16 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fmv.x.w a0, fa0
-; RV32ID-NEXT: lui a1, %hi(.LCPI12_0)
-; RV32ID-NEXT: fmv.w.x fa5, zero
-; RV32ID-NEXT: flw fa4, %lo(.LCPI12_0)(a1)
+; RV32ID-NEXT: lui a1, 391168
; RV32ID-NEXT: slli a0, a0, 16
+; RV32ID-NEXT: addi a1, a1, -1
; RV32ID-NEXT: fmv.w.x fa0, a0
-; RV32ID-NEXT: fle.s a0, fa5, fa0
-; RV32ID-NEXT: flt.s a1, fa4, fa0
-; RV32ID-NEXT: neg s0, a1
-; RV32ID-NEXT: neg s1, a0
+; RV32ID-NEXT: fmv.w.x fa5, a1
+; RV32ID-NEXT: flt.s a0, fa5, fa0
+; RV32ID-NEXT: fmv.w.x fa5, zero
+; RV32ID-NEXT: fle.s a1, fa5, fa0
+; RV32ID-NEXT: neg s0, a0
+; RV32ID-NEXT: neg s1, a1
; RV32ID-NEXT: call __fixunssfdi
; RV32ID-NEXT: and a0, s1, a0
; RV32ID-NEXT: and a1, s1, a1
diff --git a/llvm/test/CodeGen/RISCV/bfloat-imm.ll b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
index 76ff720b1c268..61014891414d8 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-imm.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
@@ -7,8 +7,9 @@
define bfloat @bfloat_imm() nounwind {
; CHECK-LABEL: bfloat_imm:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa0, %lo(.LCPI0_0)(a0)
+; CHECK-NEXT: lui a0, 4
+; CHECK-NEXT: addi a0, a0, 64
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
ret bfloat 3.0
}
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index d7957540d1b29..d8e6b7f3ede9a 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -519,15 +519,16 @@ define i32 @caller_half_on_stack() nounwind {
; RV32-ILP32F: # %bb.0:
; RV32-ILP32F-NEXT: addi sp, sp, -16
; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ILP32F-NEXT: lui a4, %hi(.LCPI3_0)
+; RV32-ILP32F-NEXT: lui a7, 1048565
; RV32-ILP32F-NEXT: li a0, 1
; RV32-ILP32F-NEXT: li a1, 2
; RV32-ILP32F-NEXT: li a2, 3
; RV32-ILP32F-NEXT: li a3, 4
-; RV32-ILP32F-NEXT: flw fa0, %lo(.LCPI3_0)(a4)
; RV32-ILP32F-NEXT: li a4, 5
; RV32-ILP32F-NEXT: li a5, 6
; RV32-ILP32F-NEXT: li a6, 7
+; RV32-ILP32F-NEXT: addi a7, a7, -1792
+; RV32-ILP32F-NEXT: fmv.w.x fa0, a7
; RV32-ILP32F-NEXT: li a7, 8
; RV32-ILP32F-NEXT: call callee_half_on_stack
; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -538,15 +539,16 @@ define i32 @caller_half_on_stack() nounwind {
; RV64-LP64F: # %bb.0:
; RV64-LP64F-NEXT: addi sp, sp, -16
; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-LP64F-NEXT: lui a4, %hi(.LCPI3_0)
+; RV64-LP64F-NEXT: lui a7, 1048565
; RV64-LP64F-NEXT: li a0, 1
; RV64-LP64F-NEXT: li a1, 2
; RV64-LP64F-NEXT: li a2, 3
; RV64-LP64F-NEXT: li a3, 4
-; RV64-LP64F-NEXT: flw fa0, %lo(.LCPI3_0)(a4)
; RV64-LP64F-NEXT: li a4, 5
; RV64-LP64F-NEXT: li a5, 6
; RV64-LP64F-NEXT: li a6, 7
+; RV64-LP64F-NEXT: addi a7, a7, -1792
+; RV64-LP64F-NEXT: fmv.w.x fa0, a7
; RV64-LP64F-NEXT: li a7, 8
; RV64-LP64F-NEXT: call callee_half_on_stack
; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -557,15 +559,16 @@ define i32 @caller_half_on_stack() nounwind {
; RV32-ILP32ZFHMIN: # %bb.0:
; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16
; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ILP32ZFHMIN-NEXT: lui a4, %hi(.LCPI3_0)
+; RV32-ILP32ZFHMIN-NEXT: lui a7, 5
; RV32-ILP32ZFHMIN-NEXT: li a0, 1
; RV32-ILP32ZFHMIN-NEXT: li a1, 2
; RV32-ILP32ZFHMIN-NEXT: li a2, 3
; RV32-ILP32ZFHMIN-NEXT: li a3, 4
-; RV32-ILP32ZFHMIN-NEXT: flh fa0, %lo(.LCPI3_0)(a4)
; RV32-ILP32ZFHMIN-NEXT: li a4, 5
; RV32-ILP32ZFHMIN-NEXT: li a5, 6
; RV32-ILP32ZFHMIN-NEXT: li a6, 7
+; RV32-ILP32ZFHMIN-NEXT: addi a7, a7, -1792
+; RV32-ILP32ZFHMIN-NEXT: fmv.h.x fa0, a7
; RV32-ILP32ZFHMIN-NEXT: li a7, 8
; RV32-ILP32ZFHMIN-NEXT: call callee_half_on_stack
; RV32-ILP32ZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -576,15 +579,16 @@ define i32 @caller_half_on_stack() nounwind {
; RV64-LP64ZFHMIN: # %bb.0:
; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16
; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-LP64ZFHMIN-NEXT: lui a4, %hi(.LCPI3_0)
+; RV64-LP64ZFHMIN-NEXT: lui a7, 5
; RV64-LP64ZFHMIN-NEXT: li a0, 1
; RV64-LP64ZFHMIN-NEXT: li a1, 2
; RV64-LP64ZFHMIN-NEXT: li a2, 3
; RV64-LP64ZFHMIN-NEXT: li a3, 4
-; RV64-LP64ZFHMIN-NEXT: flh fa0, %lo(.LCPI3_0)(a4)
; RV64-LP64ZFHMIN-NEXT: li a4, 5
; RV64-LP64ZFHMIN-NEXT: li a5, 6
; RV64-LP64ZFHMIN-NEXT: li a6, 7
+; RV64-LP64ZFHMIN-NEXT: addi a7, a7, -1792
+; RV64-LP64ZFHMIN-NEXT: fmv.h.x fa0, a7
; RV64-LP64ZFHMIN-NEXT: li a7, 8
; RV64-LP64ZFHMIN-NEXT: call callee_half_on_stack
; RV64-LP64ZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -595,15 +599,16 @@ define i32 @caller_half_on_stack() nounwind {
; RV32-ZFH-ILP32: # %bb.0:
; RV32-ZFH-ILP32-NEXT: addi sp, sp, -16
; RV32-ZFH-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ZFH-ILP32-NEXT: lui a4, %hi(.LCPI3_0)
+; RV32-ZFH-ILP32-NEXT: lui a7, 5
; RV32-ZFH-ILP32-NEXT: li a0, 1
; RV32-ZFH-ILP32-NEXT: li a1, 2
; RV32-ZFH-ILP32-NEXT: li a2, 3
; RV32-ZFH-ILP32-NEXT: li a3, 4
-; RV32-ZFH-ILP32-NEXT: flh fa5, %lo(.LCPI3_0)(a4)
; RV32-ZFH-ILP32-NEXT: li a4, 5
; RV32-ZFH-ILP32-NEXT: li a5, 6
; RV32-ZFH-ILP32-NEXT: li a6, 7
+; RV32-ZFH-ILP32-NEXT: addi a7, a7, -1792
+; RV32-ZFH-ILP32-NEXT: fmv.h.x fa5, a7
; RV32-ZFH-ILP32-NEXT: li a7, 8
; RV32-ZFH-ILP32-NEXT: fsh fa5, 0(sp)
; RV32-ZFH-ILP32-NEXT: call callee_half_on_stack
@@ -615,15 +620,16 @@ define i32 @caller_half_on_stack() nounwind {
; RV32-ZFH-ILP32F: # %bb.0:
; RV32-ZFH-ILP32F-NEXT: addi sp, sp, -16
; RV32-ZFH-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ZFH-ILP32F-NEXT: lui a4, %hi(.LCPI3_0)
+; RV32-ZFH-ILP32F-NEXT: lui a7, 5
; RV32-ZFH-ILP32F-NEXT: li a0, 1
; RV32-ZFH-ILP32F-NEXT: li a1, 2
; RV32-ZFH-ILP32F-NEXT: li a2, 3
; RV32-ZFH-ILP32F-NEXT: li a3, 4
-; RV32-ZFH-ILP32F-NEXT: flh fa0, %lo(.LCPI3_0)(a4)
; RV32-ZFH-ILP32F-NEXT: li a4, 5
; RV32-ZFH-ILP32F-NEXT: li a5, 6
; RV32-ZFH-ILP32F-NEXT: li a6, 7
+; RV32-ZFH-ILP32F-NEXT: addi a7, a7, -1792
+; RV32-ZFH-ILP32F-NEXT: fmv.h.x fa0, a7
; RV32-ZFH-ILP32F-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/159352
More information about the llvm-commits
mailing list