[llvm] [RISCV] Set riscv-fpimm-cost threshold to 3 by default (PR #159352)

via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 17 05:53:30 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Alex Bradbury (asb)

<details>
<summary>Changes</summary>

`-riscv-fp-imm-cost` controls the threshold at which the constant pool is used for float constants rather than generating directly (typically into a GPR followed by an `fmv`). The value used for this knob indicates the number of instructions that can be used to produce the value (otherwise we fall back to the constant pool). Upping to to 3 covers a huge number of additional constants (see
<https://github.com/llvm/llvm-project/issues/153402>), e.g. most whole numbers which can be generated through lui+shift+fmv. As in general we struggle with efficient code generation for constant pool accesses, reducing the number of constant pool accesses is beneficial. We are typically replacing a two-instruction sequence (which includes a load) with a three instruction sequence (two simple arithmetic operations plus a fmv), which.

The CHECK prefixes for various tests had to be updated to avoid conflicts leading to check lines being dropped altogether (see <https://github.com/llvm/llvm-project/pull/159321> for a change to update_llc_test_checks to aid diagnosing this).

---

Patch is 1.34 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159352.diff


72 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/bfloat-convert.ll (+130-89) 
- (modified) llvm/test/CodeGen/RISCV/bfloat-imm.ll (+3-2) 
- (modified) llvm/test/CodeGen/RISCV/calling-conv-half.ll (+152-132) 
- (modified) llvm/test/CodeGen/RISCV/codemodel-lowering.ll (+42-64) 
- (modified) llvm/test/CodeGen/RISCV/double-convert.ll (+34-28) 
- (modified) llvm/test/CodeGen/RISCV/double-imm.ll (+3-2) 
- (modified) llvm/test/CodeGen/RISCV/double-intrinsics.ll (+18-12) 
- (modified) llvm/test/CodeGen/RISCV/double-round-conv.ll (+15-10) 
- (modified) llvm/test/CodeGen/RISCV/double-zfa.ll (+36-15) 
- (modified) llvm/test/CodeGen/RISCV/float-convert.ll (+34-28) 
- (modified) llvm/test/CodeGen/RISCV/float-imm.ll (+3-2) 
- (modified) llvm/test/CodeGen/RISCV/float-round-conv-sat.ll (+36-24) 
- (modified) llvm/test/CodeGen/RISCV/half-arith.ll (+14-33) 
- (modified) llvm/test/CodeGen/RISCV/half-convert.ll (+411-329) 
- (modified) llvm/test/CodeGen/RISCV/half-imm.ll (+9-6) 
- (modified) llvm/test/CodeGen/RISCV/half-intrinsics.ll (+18-12) 
- (modified) llvm/test/CodeGen/RISCV/half-round-conv-sat.ll (+108-72) 
- (modified) llvm/test/CodeGen/RISCV/half-round-conv.ll (+45-30) 
- (modified) llvm/test/CodeGen/RISCV/half-select-fcmp.ll (+17-15) 
- (modified) llvm/test/CodeGen/RISCV/half-zfa-fli.ll (+39-26) 
- (modified) llvm/test/CodeGen/RISCV/half-zfa.ll (+9-6) 
- (modified) llvm/test/CodeGen/RISCV/repeated-fp-divisors.ll (+3-2) 
- (modified) llvm/test/CodeGen/RISCV/rv64-double-convert.ll (+12-9) 
- (modified) llvm/test/CodeGen/RISCV/rv64-float-convert.ll (+7-5) 
- (modified) llvm/test/CodeGen/RISCV/rv64-half-convert.ll (+20-17) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll (+533-263) 
- (modified) llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll (+72-48) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll (+266-72) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll (+266-72) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll (+1040-258) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll (+1040-258) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll (+125-63) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (+432-116) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll (+266-72) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll (+266-72) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll (+148-74) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll (+526-256) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+674-326) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll (+470-228) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll (+1040-258) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll (+1040-258) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll (+1040-258) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll (+121-60) 
- (modified) llvm/test/CodeGen/RISCV/rvv/floor-vp.ll (+1036-262) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll (+266-72) 
- (modified) llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll (+234-64) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll (+266-72) 
- (modified) llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll (+164-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll (+266-72) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll (+148-74) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll (+234-64) 
- (modified) llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll (+36-24) 
- (modified) llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll (+1036-262) 
- (modified) llvm/test/CodeGen/RISCV/rvv/rint-vp.ll (+924-234) 
- (modified) llvm/test/CodeGen/RISCV/rvv/round-vp.ll (+1036-262) 
- (modified) llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll (+1036-262) 
- (modified) llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll (+1036-262) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll (+52-25) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll (+6-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll (+6-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+12-12) 
- (modified) llvm/test/CodeGen/RISCV/srodata.ll (-15) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 10b3f0b213811..9de57a2879d5b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -79,7 +79,7 @@ static cl::opt<int>
     FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
               cl::desc("Give the maximum number of instructions that we will "
                        "use for creating a floating-point immediate value"),
-              cl::init(2));
+              cl::init(3));
 
 static cl::opt<bool>
     ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index 6207a17734d62..73ff888e44b3b 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -51,13 +51,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16_sat:
 ; CHECK32ZFBFMIN:       # %bb.0: # %start
 ; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32ZFBFMIN-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK32ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK32ZFBFMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
 ; CHECK32ZFBFMIN-NEXT:    lui a0, 815104
-; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa3
-; CHECK32ZFBFMIN-NEXT:    neg a0, a1
+; CHECK32ZFBFMIN-NEXT:    lui a1, 290816
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK32ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    addi a1, a1, -512
+; CHECK32ZFBFMIN-NEXT:    neg a0, a0
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a1, fa5, rtz
 ; CHECK32ZFBFMIN-NEXT:    and a0, a0, a1
@@ -68,12 +69,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; RV32ID-NEXT:    fmv.x.w a0, fa0
 ; RV32ID-NEXT:    lui a1, 815104
 ; RV32ID-NEXT:    fmv.w.x fa5, a1
-; RV32ID-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV32ID-NEXT:    lui a1, 290816
 ; RV32ID-NEXT:    slli a0, a0, 16
-; RV32ID-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; RV32ID-NEXT:    fmv.w.x fa3, a0
-; RV32ID-NEXT:    feq.s a0, fa3, fa3
-; RV32ID-NEXT:    fmax.s fa5, fa3, fa5
+; RV32ID-NEXT:    addi a1, a1, -512
+; RV32ID-NEXT:    fmv.w.x fa4, a0
+; RV32ID-NEXT:    feq.s a0, fa4, fa4
+; RV32ID-NEXT:    fmax.s fa5, fa4, fa5
+; RV32ID-NEXT:    fmv.w.x fa4, a1
 ; RV32ID-NEXT:    neg a0, a0
 ; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.w.s a1, fa5, rtz
@@ -83,13 +85,14 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16_sat:
 ; CHECK64ZFBFMIN:       # %bb.0: # %start
 ; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64ZFBFMIN-NEXT:    lui a0, %hi(.LCPI1_0)
-; CHECK64ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
-; CHECK64ZFBFMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a0)
 ; CHECK64ZFBFMIN-NEXT:    lui a0, 815104
-; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa3, a0
-; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa3
-; CHECK64ZFBFMIN-NEXT:    neg a0, a1
+; CHECK64ZFBFMIN-NEXT:    lui a1, 290816
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK64ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    addi a1, a1, -512
+; CHECK64ZFBFMIN-NEXT:    neg a0, a0
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a1
 ; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64ZFBFMIN-NEXT:    fcvt.l.s a1, fa5, rtz
 ; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
@@ -100,12 +103,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
 ; RV64ID-NEXT:    lui a1, 815104
 ; RV64ID-NEXT:    fmv.w.x fa5, a1
-; RV64ID-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV64ID-NEXT:    lui a1, 290816
 ; RV64ID-NEXT:    slli a0, a0, 16
-; RV64ID-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; RV64ID-NEXT:    fmv.w.x fa3, a0
-; RV64ID-NEXT:    feq.s a0, fa3, fa3
-; RV64ID-NEXT:    fmax.s fa5, fa3, fa5
+; RV64ID-NEXT:    addi a1, a1, -512
+; RV64ID-NEXT:    fmv.w.x fa4, a0
+; RV64ID-NEXT:    feq.s a0, fa4, fa4
+; RV64ID-NEXT:    fmax.s fa5, fa4, fa5
+; RV64ID-NEXT:    fmv.w.x fa4, a1
 ; RV64ID-NEXT:    neg a0, a0
 ; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.l.s a1, fa5, rtz
@@ -152,49 +156,53 @@ define i16 @fcvt_ui_bf16(bfloat %a) nounwind {
 define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
 ; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
 ; CHECK32ZFBFMIN:       # %bb.0: # %start
-; CHECK32ZFBFMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK32ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
-; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK32ZFBFMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK32ZFBFMIN-NEXT:    lui a0, 292864
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    addi a0, a0, -256
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK32ZFBFMIN-NEXT:    ret
 ;
 ; RV32ID-LABEL: fcvt_ui_bf16_sat:
 ; RV32ID:       # %bb.0: # %start
-; RV32ID-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
 ; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    fmv.w.x fa5, zero
 ; RV32ID-NEXT:    slli a0, a0, 16
 ; RV32ID-NEXT:    fmv.w.x fa4, a0
-; RV32ID-NEXT:    fmv.w.x fa3, zero
-; RV32ID-NEXT:    fmax.s fa4, fa4, fa3
-; RV32ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-NEXT:    lui a0, 292864
+; RV32ID-NEXT:    addi a0, a0, -256
+; RV32ID-NEXT:    fmax.s fa5, fa4, fa5
+; RV32ID-NEXT:    fmv.w.x fa4, a0
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV32ID-NEXT:    ret
 ;
 ; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
 ; CHECK64ZFBFMIN:       # %bb.0: # %start
-; CHECK64ZFBFMIN-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK64ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
-; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa3, zero
-; CHECK64ZFBFMIN-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK64ZFBFMIN-NEXT:    lui a0, 292864
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    addi a0, a0, -256
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
 ; CHECK64ZFBFMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; CHECK64ZFBFMIN-NEXT:    ret
 ;
 ; RV64ID-LABEL: fcvt_ui_bf16_sat:
 ; RV64ID:       # %bb.0: # %start
-; RV64ID-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    fmv.w.x fa5, zero
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa4, a0
-; RV64ID-NEXT:    fmv.w.x fa3, zero
-; RV64ID-NEXT:    fmax.s fa4, fa4, fa3
-; RV64ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-NEXT:    lui a0, 292864
+; RV64ID-NEXT:    addi a0, a0, -256
+; RV64ID-NEXT:    fmax.s fa5, fa4, fa5
+; RV64ID-NEXT:    fmv.w.x fa4, a0
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
 ; RV64ID-NEXT:    ret
 start:
@@ -472,20 +480,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; RV32IZFBFMIN-NEXT:  # %bb.1: # %start
 ; RV32IZFBFMIN-NEXT:    mv a2, a1
 ; RV32IZFBFMIN-NEXT:  .LBB10_2: # %start
-; RV32IZFBFMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32IZFBFMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32IZFBFMIN-NEXT:    lui a1, 389120
+; RV32IZFBFMIN-NEXT:    addi a1, a1, -1
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa5, a1
 ; RV32IZFBFMIN-NEXT:    flt.s a1, fa5, fs0
 ; RV32IZFBFMIN-NEXT:    beqz a1, .LBB10_4
 ; RV32IZFBFMIN-NEXT:  # %bb.3:
 ; RV32IZFBFMIN-NEXT:    addi a2, a3, -1
 ; RV32IZFBFMIN-NEXT:  .LBB10_4: # %start
 ; RV32IZFBFMIN-NEXT:    feq.s a3, fs0, fs0
-; RV32IZFBFMIN-NEXT:    neg a4, a1
-; RV32IZFBFMIN-NEXT:    neg a1, s0
+; RV32IZFBFMIN-NEXT:    neg a4, s0
+; RV32IZFBFMIN-NEXT:    neg a5, a1
 ; RV32IZFBFMIN-NEXT:    neg a3, a3
-; RV32IZFBFMIN-NEXT:    and a0, a1, a0
+; RV32IZFBFMIN-NEXT:    and a0, a4, a0
 ; RV32IZFBFMIN-NEXT:    and a1, a3, a2
-; RV32IZFBFMIN-NEXT:    or a0, a4, a0
+; RV32IZFBFMIN-NEXT:    or a0, a5, a0
 ; RV32IZFBFMIN-NEXT:    and a0, a3, a0
 ; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -511,20 +520,21 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; R32IDZFBFMIN-NEXT:  # %bb.1: # %start
 ; R32IDZFBFMIN-NEXT:    mv a2, a1
 ; R32IDZFBFMIN-NEXT:  .LBB10_2: # %start
-; R32IDZFBFMIN-NEXT:    lui a1, %hi(.LCPI10_0)
-; R32IDZFBFMIN-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; R32IDZFBFMIN-NEXT:    lui a1, 389120
+; R32IDZFBFMIN-NEXT:    addi a1, a1, -1
+; R32IDZFBFMIN-NEXT:    fmv.w.x fa5, a1
 ; R32IDZFBFMIN-NEXT:    flt.s a1, fa5, fs0
 ; R32IDZFBFMIN-NEXT:    beqz a1, .LBB10_4
 ; R32IDZFBFMIN-NEXT:  # %bb.3:
 ; R32IDZFBFMIN-NEXT:    addi a2, a3, -1
 ; R32IDZFBFMIN-NEXT:  .LBB10_4: # %start
 ; R32IDZFBFMIN-NEXT:    feq.s a3, fs0, fs0
-; R32IDZFBFMIN-NEXT:    neg a4, a1
-; R32IDZFBFMIN-NEXT:    neg a1, s0
+; R32IDZFBFMIN-NEXT:    neg a4, s0
+; R32IDZFBFMIN-NEXT:    neg a5, a1
 ; R32IDZFBFMIN-NEXT:    neg a3, a3
-; R32IDZFBFMIN-NEXT:    and a0, a1, a0
+; R32IDZFBFMIN-NEXT:    and a0, a4, a0
 ; R32IDZFBFMIN-NEXT:    and a1, a3, a2
-; R32IDZFBFMIN-NEXT:    or a0, a4, a0
+; R32IDZFBFMIN-NEXT:    or a0, a5, a0
 ; R32IDZFBFMIN-NEXT:    and a0, a3, a0
 ; R32IDZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; R32IDZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -552,8 +562,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; RV32ID-NEXT:  # %bb.1: # %start
 ; RV32ID-NEXT:    mv a2, a1
 ; RV32ID-NEXT:  .LBB10_2: # %start
-; RV32ID-NEXT:    lui a1, %hi(.LCPI10_0)
-; RV32ID-NEXT:    flw fa5, %lo(.LCPI10_0)(a1)
+; RV32ID-NEXT:    lui a1, 389120
+; RV32ID-NEXT:    addi a1, a1, -1
+; RV32ID-NEXT:    fmv.w.x fa5, a1
 ; RV32ID-NEXT:    flt.s a1, fa5, fs0
 ; RV32ID-NEXT:    beqz a1, .LBB10_4
 ; RV32ID-NEXT:  # %bb.3:
@@ -641,30 +652,59 @@ define i64 @fcvt_lu_bf16(bfloat %a) nounwind {
 }
 
 define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
-; CHECK32ZFBFMIN-LABEL: fcvt_lu_bf16_sat:
-; CHECK32ZFBFMIN:       # %bb.0: # %start
-; CHECK32ZFBFMIN-NEXT:    addi sp, sp, -16
-; CHECK32ZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32ZFBFMIN-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK32ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI12_0)(a0)
-; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa0, fa0
-; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, zero
-; CHECK32ZFBFMIN-NEXT:    fle.s a0, fa4, fa0
-; CHECK32ZFBFMIN-NEXT:    flt.s a1, fa5, fa0
-; CHECK32ZFBFMIN-NEXT:    neg s0, a1
-; CHECK32ZFBFMIN-NEXT:    neg s1, a0
-; CHECK32ZFBFMIN-NEXT:    call __fixunssfdi
-; CHECK32ZFBFMIN-NEXT:    and a0, s1, a0
-; CHECK32ZFBFMIN-NEXT:    and a1, s1, a1
-; CHECK32ZFBFMIN-NEXT:    or a0, s0, a0
-; CHECK32ZFBFMIN-NEXT:    or a1, s0, a1
-; CHECK32ZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; CHECK32ZFBFMIN-NEXT:    addi sp, sp, 16
-; CHECK32ZFBFMIN-NEXT:    ret
+; RV32IZFBFMIN-LABEL: fcvt_lu_bf16_sat:
+; RV32IZFBFMIN:       # %bb.0: # %start
+; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
+; RV32IZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fs0, fa0
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa5, zero
+; RV32IZFBFMIN-NEXT:    fle.s a0, fa5, fs0
+; RV32IZFBFMIN-NEXT:    neg s0, a0
+; RV32IZFBFMIN-NEXT:    fmv.s fa0, fs0
+; RV32IZFBFMIN-NEXT:    call __fixunssfdi
+; RV32IZFBFMIN-NEXT:    and a0, s0, a0
+; RV32IZFBFMIN-NEXT:    lui a2, 391168
+; RV32IZFBFMIN-NEXT:    and a1, s0, a1
+; RV32IZFBFMIN-NEXT:    addi a2, a2, -1
+; RV32IZFBFMIN-NEXT:    fmv.w.x fa5, a2
+; RV32IZFBFMIN-NEXT:    flt.s a2, fa5, fs0
+; RV32IZFBFMIN-NEXT:    neg a2, a2
+; RV32IZFBFMIN-NEXT:    or a0, a2, a0
+; RV32IZFBFMIN-NEXT:    or a1, a2, a1
+; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    ret
+;
+; R32IDZFBFMIN-LABEL: fcvt_lu_bf16_sat:
+; R32IDZFBFMIN:       # %bb.0: # %start
+; R32IDZFBFMIN-NEXT:    addi sp, sp, -16
+; R32IDZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; R32IDZFBFMIN-NEXT:    fcvt.s.bf16 fs0, fa0
+; R32IDZFBFMIN-NEXT:    fmv.w.x fa5, zero
+; R32IDZFBFMIN-NEXT:    fle.s a0, fa5, fs0
+; R32IDZFBFMIN-NEXT:    neg s0, a0
+; R32IDZFBFMIN-NEXT:    fmv.s fa0, fs0
+; R32IDZFBFMIN-NEXT:    call __fixunssfdi
+; R32IDZFBFMIN-NEXT:    and a0, s0, a0
+; R32IDZFBFMIN-NEXT:    lui a2, 391168
+; R32IDZFBFMIN-NEXT:    and a1, s0, a1
+; R32IDZFBFMIN-NEXT:    addi a2, a2, -1
+; R32IDZFBFMIN-NEXT:    fmv.w.x fa5, a2
+; R32IDZFBFMIN-NEXT:    flt.s a2, fa5, fs0
+; R32IDZFBFMIN-NEXT:    neg a2, a2
+; R32IDZFBFMIN-NEXT:    or a0, a2, a0
+; R32IDZFBFMIN-NEXT:    or a1, a2, a1
+; R32IDZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; R32IDZFBFMIN-NEXT:    addi sp, sp, 16
+; R32IDZFBFMIN-NEXT:    ret
 ;
 ; RV32ID-LABEL: fcvt_lu_bf16_sat:
 ; RV32ID:       # %bb.0: # %start
@@ -673,15 +713,16 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
 ; RV32ID-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32ID-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32ID-NEXT:    fmv.x.w a0, fa0
-; RV32ID-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV32ID-NEXT:    fmv.w.x fa5, zero
-; RV32ID-NEXT:    flw fa4, %lo(.LCPI12_0)(a1)
+; RV32ID-NEXT:    lui a1, 391168
 ; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    addi a1, a1, -1
 ; RV32ID-NEXT:    fmv.w.x fa0, a0
-; RV32ID-NEXT:    fle.s a0, fa5, fa0
-; RV32ID-NEXT:    flt.s a1, fa4, fa0
-; RV32ID-NEXT:    neg s0, a1
-; RV32ID-NEXT:    neg s1, a0
+; RV32ID-NEXT:    fmv.w.x fa5, a1
+; RV32ID-NEXT:    flt.s a0, fa5, fa0
+; RV32ID-NEXT:    fmv.w.x fa5, zero
+; RV32ID-NEXT:    fle.s a1, fa5, fa0
+; RV32ID-NEXT:    neg s0, a0
+; RV32ID-NEXT:    neg s1, a1
 ; RV32ID-NEXT:    call __fixunssfdi
 ; RV32ID-NEXT:    and a0, s1, a0
 ; RV32ID-NEXT:    and a1, s1, a1
diff --git a/llvm/test/CodeGen/RISCV/bfloat-imm.ll b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
index 76ff720b1c268..61014891414d8 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-imm.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-imm.ll
@@ -7,8 +7,9 @@
 define bfloat @bfloat_imm() nounwind {
 ; CHECK-LABEL: bfloat_imm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT:    flh fa0, %lo(.LCPI0_0)(a0)
+; CHECK-NEXT:    lui a0, 4
+; CHECK-NEXT:    addi a0, a0, 64
+; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
   ret bfloat 3.0
 }
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index d7957540d1b29..d8e6b7f3ede9a 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -519,15 +519,16 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV32-ILP32F:       # %bb.0:
 ; RV32-ILP32F-NEXT:    addi sp, sp, -16
 ; RV32-ILP32F-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ILP32F-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV32-ILP32F-NEXT:    lui a7, 1048565
 ; RV32-ILP32F-NEXT:    li a0, 1
 ; RV32-ILP32F-NEXT:    li a1, 2
 ; RV32-ILP32F-NEXT:    li a2, 3
 ; RV32-ILP32F-NEXT:    li a3, 4
-; RV32-ILP32F-NEXT:    flw fa0, %lo(.LCPI3_0)(a4)
 ; RV32-ILP32F-NEXT:    li a4, 5
 ; RV32-ILP32F-NEXT:    li a5, 6
 ; RV32-ILP32F-NEXT:    li a6, 7
+; RV32-ILP32F-NEXT:    addi a7, a7, -1792
+; RV32-ILP32F-NEXT:    fmv.w.x fa0, a7
 ; RV32-ILP32F-NEXT:    li a7, 8
 ; RV32-ILP32F-NEXT:    call callee_half_on_stack
 ; RV32-ILP32F-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -538,15 +539,16 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV64-LP64F:       # %bb.0:
 ; RV64-LP64F-NEXT:    addi sp, sp, -16
 ; RV64-LP64F-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-LP64F-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV64-LP64F-NEXT:    lui a7, 1048565
 ; RV64-LP64F-NEXT:    li a0, 1
 ; RV64-LP64F-NEXT:    li a1, 2
 ; RV64-LP64F-NEXT:    li a2, 3
 ; RV64-LP64F-NEXT:    li a3, 4
-; RV64-LP64F-NEXT:    flw fa0, %lo(.LCPI3_0)(a4)
 ; RV64-LP64F-NEXT:    li a4, 5
 ; RV64-LP64F-NEXT:    li a5, 6
 ; RV64-LP64F-NEXT:    li a6, 7
+; RV64-LP64F-NEXT:    addi a7, a7, -1792
+; RV64-LP64F-NEXT:    fmv.w.x fa0, a7
 ; RV64-LP64F-NEXT:    li a7, 8
 ; RV64-LP64F-NEXT:    call callee_half_on_stack
 ; RV64-LP64F-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -557,15 +559,16 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV32-ILP32ZFHMIN:       # %bb.0:
 ; RV32-ILP32ZFHMIN-NEXT:    addi sp, sp, -16
 ; RV32-ILP32ZFHMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ILP32ZFHMIN-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV32-ILP32ZFHMIN-NEXT:    lui a7, 5
 ; RV32-ILP32ZFHMIN-NEXT:    li a0, 1
 ; RV32-ILP32ZFHMIN-NEXT:    li a1, 2
 ; RV32-ILP32ZFHMIN-NEXT:    li a2, 3
 ; RV32-ILP32ZFHMIN-NEXT:    li a3, 4
-; RV32-ILP32ZFHMIN-NEXT:    flh fa0, %lo(.LCPI3_0)(a4)
 ; RV32-ILP32ZFHMIN-NEXT:    li a4, 5
 ; RV32-ILP32ZFHMIN-NEXT:    li a5, 6
 ; RV32-ILP32ZFHMIN-NEXT:    li a6, 7
+; RV32-ILP32ZFHMIN-NEXT:    addi a7, a7, -1792
+; RV32-ILP32ZFHMIN-NEXT:    fmv.h.x fa0, a7
 ; RV32-ILP32ZFHMIN-NEXT:    li a7, 8
 ; RV32-ILP32ZFHMIN-NEXT:    call callee_half_on_stack
 ; RV32-ILP32ZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -576,15 +579,16 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV64-LP64ZFHMIN:       # %bb.0:
 ; RV64-LP64ZFHMIN-NEXT:    addi sp, sp, -16
 ; RV64-LP64ZFHMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-LP64ZFHMIN-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV64-LP64ZFHMIN-NEXT:    lui a7, 5
 ; RV64-LP64ZFHMIN-NEXT:    li a0, 1
 ; RV64-LP64ZFHMIN-NEXT:    li a1, 2
 ; RV64-LP64ZFHMIN-NEXT:    li a2, 3
 ; RV64-LP64ZFHMIN-NEXT:    li a3, 4
-; RV64-LP64ZFHMIN-NEXT:    flh fa0, %lo(.LCPI3_0)(a4)
 ; RV64-LP64ZFHMIN-NEXT:    li a4, 5
 ; RV64-LP64ZFHMIN-NEXT:    li a5, 6
 ; RV64-LP64ZFHMIN-NEXT:    li a6, 7
+; RV64-LP64ZFHMIN-NEXT:    addi a7, a7, -1792
+; RV64-LP64ZFHMIN-NEXT:    fmv.h.x fa0, a7
 ; RV64-LP64ZFHMIN-NEXT:    li a7, 8
 ; RV64-LP64ZFHMIN-NEXT:    call callee_half_on_stack
 ; RV64-LP64ZFHMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -595,15 +599,16 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV32-ZFH-ILP32:       # %bb.0:
 ; RV32-ZFH-ILP32-NEXT:    addi sp, sp, -16
 ; RV32-ZFH-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ZFH-ILP32-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV32-ZFH-ILP32-NEXT:    lui a7, 5
 ; RV32-ZFH-ILP32-NEXT:    li a0, 1
 ; RV32-ZFH-ILP32-NEXT:    li a1, 2
 ; RV32-ZFH-ILP32-NEXT:    li a2, 3
 ; RV32-ZFH-ILP32-NEXT:    li a3, 4
-; RV32-ZFH-ILP32-NEXT:    flh fa5, %lo(.LCPI3_0)(a4)
 ; RV32-ZFH-ILP32-NEXT:    li a4, 5
 ; RV32-ZFH-ILP32-NEXT:    li a5, 6
 ; RV32-ZFH-ILP32-NEXT:    li a6, 7
+; RV32-ZFH-ILP32-NEXT:    addi a7, a7, -1792
+; RV32-ZFH-ILP32-NEXT:    fmv.h.x fa5, a7
 ; RV32-ZFH-ILP32-NEXT:    li a7, 8
 ; RV32-ZFH-ILP32-NEXT:    fsh fa5, 0(sp)
 ; RV32-ZFH-ILP32-NEXT:    call callee_half_on_stack
@@ -615,15 +620,16 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV32-ZFH-ILP32F:       # %bb.0:
 ; RV32-ZFH-ILP32F-NEXT:    addi sp, sp, -16
 ; RV32-ZFH-ILP32F-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ZFH-ILP32F-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV32-ZFH-ILP32F-NEXT:    lui a7, 5
 ; RV32-ZFH-ILP32F-NEXT:    li a0, 1
 ; RV32-ZFH-ILP32F-NEXT:    li a1, 2
 ; RV32-ZFH-ILP32F-NEXT:    li a2, 3
 ; RV32-ZFH-ILP32F-NEXT:    li a3, 4
-; RV32-ZFH-ILP32F-NEXT:    flh fa0, %lo(.LCPI3_0)(a4)
 ; RV32-ZFH-ILP32F-NEXT:    li a4, 5
 ; RV32-ZFH-ILP32F-NEXT:    li a5, 6
 ; RV32-ZFH-ILP32F-NEXT:    li a6, 7
+; RV32-ZFH-ILP32F-NEXT:    addi a7, a7, -1792
+; RV32-ZFH-ILP32F-NEXT:    fmv.h.x fa0, a7
 ; RV32-ZFH-ILP32F-...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/159352


More information about the llvm-commits mailing list