[llvm] [RISCV] Promote bf16 ops to f32 with zvfbfmin (PR #108937)

Tue Sep 17 00:36:24 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

For f16 with zvfhmin, we promote most ops and VP ops to f32. This does the same for bf16 with zvfbfmin, so the two fp types should now be in sync.

There are a few places in the custom lowering where we need to check for a LMUL 8 f16/bf16 vector that can't be promoted and must be split, this extracts that out into isPromotedOpNeedingSplit.

In a follow up NFC we can deduplicate the code that sets up the promotions.

Stacked upon #108765


---

Patch is 1.16 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108937.diff


46 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+78-78) 
- (modified) llvm/test/Analysis/CostModel/RISCV/fround.ll (+8-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll (+471-65) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll (+372-88) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll (+378-88) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (+37-906) 
- (modified) llvm/test/CodeGen/RISCV/rvv/floor-vp.ll (+471-65) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll (+187-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll (+545-17) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll (+187-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll (+545-17) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll (+401-87) 
- (modified) llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll (+342-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll (+372-88) 
- (modified) llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll (+372-89) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll (+342-82) 
- (modified) llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll (+465-65) 
- (modified) llvm/test/CodeGen/RISCV/rvv/rint-vp.ll (+442-65) 
- (modified) llvm/test/CodeGen/RISCV/rvv/round-vp.ll (+473-65) 
- (modified) llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll (+473-65) 
- (modified) llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll (+473-65) 
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll (+1718-195) 
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll (+1154-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll (+235-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll (+248-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll (+664-17) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll (+44-11) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll (+254-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll (+240-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll (+626-17) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll (+2087-362) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll (+394-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll (+561-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll (+239-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll (+278-13) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll (+239-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll (+278-13) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll (+235-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll (+248-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll (+106-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll (+101-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll (+240-17) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll (+254-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll (+248-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll (+626-17) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll (+308-32) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index eb8ea95e2d8583..efc005dff325a8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -937,39 +937,41 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     };
 
     // TODO: support more ops.
-    static const unsigned ZvfhminPromoteOps[] = {
-        ISD::FMINNUM,     ISD::FMAXNUM,     ISD::FADD,         ISD::FSUB,
-        ISD::FMUL,        ISD::FMA,         ISD::FDIV,         ISD::FSQRT,
-        ISD::FCEIL,       ISD::FFLOOR,      ISD::FROUND,       ISD::FROUNDEVEN,
-        ISD::FRINT,       ISD::FNEARBYINT,  ISD::IS_FPCLASS,   ISD::SETCC,
-        ISD::FMAXIMUM,    ISD::FMINIMUM,    ISD::STRICT_FADD,  ISD::STRICT_FSUB,
-        ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+    static const unsigned ZvfhminZvfbfminPromoteOps[] = {
+        ISD::FMINNUM,     ISD::FMAXNUM,     ISD::FADD,        ISD::FSUB,
+        ISD::FMUL,        ISD::FMA,         ISD::FDIV,        ISD::FSQRT,
+        ISD::FCEIL,       ISD::FTRUNC,      ISD::FFLOOR,      ISD::FROUND,
+        ISD::FROUNDEVEN,  ISD::FRINT,       ISD::FNEARBYINT,  ISD::IS_FPCLASS,
+        ISD::SETCC,       ISD::FMAXIMUM,    ISD::FMINIMUM,    ISD::STRICT_FADD,
+        ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
+        ISD::STRICT_FMA};
 
     // TODO: support more vp ops.
-    static const unsigned ZvfhminPromoteVPOps[] = {ISD::VP_FADD,
-                                                   ISD::VP_FSUB,
-                                                   ISD::VP_FMUL,
-                                                   ISD::VP_FDIV,
-                                                   ISD::VP_FMA,
-                                                   ISD::VP_REDUCE_FADD,
-                                                   ISD::VP_REDUCE_SEQ_FADD,
-                                                   ISD::VP_REDUCE_FMIN,
-                                                   ISD::VP_REDUCE_FMAX,
-                                                   ISD::VP_SQRT,
-                                                   ISD::VP_FMINNUM,
-                                                   ISD::VP_FMAXNUM,
-                                                   ISD::VP_FCEIL,
-                                                   ISD::VP_FFLOOR,
-                                                   ISD::VP_FROUND,
-                                                   ISD::VP_FROUNDEVEN,
-                                                   ISD::VP_FROUNDTOZERO,
-                                                   ISD::VP_FRINT,
-                                                   ISD::VP_FNEARBYINT,
-                                                   ISD::VP_SETCC,
-                                                   ISD::VP_FMINIMUM,
-                                                   ISD::VP_FMAXIMUM,
-                                                   ISD::VP_REDUCE_FMINIMUM,
-                                                   ISD::VP_REDUCE_FMAXIMUM};
+    static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
+        ISD::VP_FADD,
+        ISD::VP_FSUB,
+        ISD::VP_FMUL,
+        ISD::VP_FDIV,
+        ISD::VP_FMA,
+        ISD::VP_REDUCE_FADD,
+        ISD::VP_REDUCE_SEQ_FADD,
+        ISD::VP_REDUCE_FMIN,
+        ISD::VP_REDUCE_FMAX,
+        ISD::VP_SQRT,
+        ISD::VP_FMINNUM,
+        ISD::VP_FMAXNUM,
+        ISD::VP_FCEIL,
+        ISD::VP_FFLOOR,
+        ISD::VP_FROUND,
+        ISD::VP_FROUNDEVEN,
+        ISD::VP_FROUNDTOZERO,
+        ISD::VP_FRINT,
+        ISD::VP_FNEARBYINT,
+        ISD::VP_SETCC,
+        ISD::VP_FMINIMUM,
+        ISD::VP_FMAXIMUM,
+        ISD::VP_REDUCE_FMINIMUM,
+        ISD::VP_REDUCE_FMAXIMUM};
 
     // Sets common operation actions on RVV floating-point vector types.
     const auto SetCommonVFPActions = [&](MVT VT) {
@@ -1092,20 +1094,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::FABS, VT, Expand);
         setOperationAction(ISD::FCOPYSIGN, VT, Expand);
 
-        // Custom split nxv32f16 since nxv32f32 if not legal.
+        // Custom split nxv32f16 since nxv32f32 is not legal.
         if (VT == MVT::nxv32f16) {
-          setOperationAction(ZvfhminPromoteOps, VT, Custom);
-          setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
+          setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+          setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
           continue;
         }
         // Add more promote ops.
         MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
-        setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
-        setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+        setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+        setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
       }
     }
 
-    // TODO: Could we merge some code with zvfhmin?
+    // TODO: merge with zvfhmin
     if (Subtarget.hasVInstructionsBF16Minimal()) {
       for (MVT VT : BF16VecVTs) {
         if (!isTypeLegal(VT))
@@ -1134,7 +1136,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::FABS, VT, Expand);
         setOperationAction(ISD::FCOPYSIGN, VT, Expand);
 
-        // TODO: Promote to fp32.
+        // Custom split nxv32f16 since nxv32f32 is not legal.
+        if (VT == MVT::nxv32bf16) {
+          setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+          setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
+          continue;
+        }
+        // Add more promote ops.
+        MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+        setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+        setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
       }
     }
 
@@ -1370,8 +1381,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           // TODO: could split the f16 vector into two vectors and do promotion.
           if (!isTypeLegal(F32VecVT))
             continue;
-          setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
-          setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+          setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+          setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
           continue;
         }
 
@@ -6328,6 +6339,17 @@ static bool hasMaskOp(unsigned Opcode) {
   return false;
 }
 
+static bool isPromotedOpNeedingSplit(SDValue Op,
+                                     const RISCVSubtarget &Subtarget) {
+  if (Op.getValueType() == MVT::nxv32f16 &&
+      (Subtarget.hasVInstructionsF16Minimal() &&
+       !Subtarget.hasVInstructionsF16()))
+    return true;
+  if (Op.getValueType() == MVT::nxv32bf16)
+    return true;
+  return false;
+}
+
 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
   SDLoc DL(Op);
@@ -6665,9 +6687,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   }
   case ISD::FMAXIMUM:
   case ISD::FMINIMUM:
-    if (Op.getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
       return SplitVectorOp(Op, DAG);
     return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
   case ISD::FP_EXTEND:
@@ -6683,8 +6703,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
           (Subtarget.hasVInstructionsF16Minimal() &&
            !Subtarget.hasVInstructionsF16())) ||
          Op.getValueType().getScalarType() == MVT::bf16)) {
-      if (Op.getValueType() == MVT::nxv32f16 ||
-          Op.getValueType() == MVT::nxv32bf16)
+      if (isPromotedOpNeedingSplit(Op, Subtarget))
         return SplitVectorOp(Op, DAG);
       // int -> f32
       SDLoc DL(Op);
@@ -6704,8 +6723,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
           (Subtarget.hasVInstructionsF16Minimal() &&
            !Subtarget.hasVInstructionsF16())) ||
          Op1.getValueType().getScalarType() == MVT::bf16)) {
-      if (Op1.getValueType() == MVT::nxv32f16 ||
-          Op1.getValueType() == MVT::nxv32bf16)
+      if (isPromotedOpNeedingSplit(Op1, Subtarget))
         return SplitVectorOp(Op, DAG);
       // [b]f16 -> f32
       SDLoc DL(Op);
@@ -6925,6 +6943,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::FRINT:
   case ISD::FROUND:
   case ISD::FROUNDEVEN:
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
+      return SplitVectorOp(Op, DAG);
     return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
   case ISD::LRINT:
   case ISD::LLRINT:
@@ -6981,9 +7001,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VP_REDUCE_FMAX:
   case ISD::VP_REDUCE_FMINIMUM:
   case ISD::VP_REDUCE_FMAXIMUM:
-    if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
       return SplitVectorReductionOp(Op, DAG);
     return lowerVPREDUCE(Op, DAG);
   case ISD::VP_REDUCE_AND:
@@ -7230,9 +7248,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
       return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
     }
 
-    if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
       return SplitVectorOp(Op, DAG);
 
     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
@@ -7274,9 +7290,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::FMA:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-    if (Op.getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
       return SplitVectorOp(Op, DAG);
     [[fallthrough]];
   case ISD::AVGFLOORS:
@@ -7324,9 +7338,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::FCOPYSIGN:
     if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
       return lowerFCOPYSIGN(Op, DAG, Subtarget);
-    if (Op.getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
       return SplitVectorOp(Op, DAG);
     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
   case ISD::STRICT_FADD:
@@ -7335,9 +7347,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::STRICT_FDIV:
   case ISD::STRICT_FSQRT:
   case ISD::STRICT_FMA:
-    if (Op.getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
       return SplitStrictFPVectorOp(Op, DAG);
     return lowerToScalableOp(Op, DAG);
   case ISD::STRICT_FSETCC:
@@ -7394,9 +7404,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VP_FMINNUM:
   case ISD::VP_FMAXNUM:
   case ISD::VP_FCOPYSIGN:
-    if (Op.getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
       return SplitVPOp(Op, DAG);
     [[fallthrough]];
   case ISD::VP_SRA:
@@ -7422,8 +7430,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
           (Subtarget.hasVInstructionsF16Minimal() &&
            !Subtarget.hasVInstructionsF16())) ||
          Op.getValueType().getScalarType() == MVT::bf16)) {
-      if (Op.getValueType() == MVT::nxv32f16 ||
-          Op.getValueType() == MVT::nxv32bf16)
+      if (isPromotedOpNeedingSplit(Op, Subtarget))
         return SplitVectorOp(Op, DAG);
       // int -> f32
       SDLoc DL(Op);
@@ -7443,8 +7450,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
           (Subtarget.hasVInstructionsF16Minimal() &&
            !Subtarget.hasVInstructionsF16())) ||
          Op1.getValueType().getScalarType() == MVT::bf16)) {
-      if (Op1.getValueType() == MVT::nxv32f16 ||
-          Op1.getValueType() == MVT::nxv32bf16)
+      if (isPromotedOpNeedingSplit(Op1, Subtarget))
         return SplitVectorOp(Op, DAG);
       // [b]f16 -> f32
       SDLoc DL(Op);
@@ -7457,9 +7463,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     }
     return lowerVPFPIntConvOp(Op, DAG);
   case ISD::VP_SETCC:
-    if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
       return SplitVPOp(Op, DAG);
     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
       return lowerVPSetCCMaskOp(Op, DAG);
@@ -7494,16 +7498,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VP_FROUND:
   case ISD::VP_FROUNDEVEN:
   case ISD::VP_FROUNDTOZERO:
-    if (Op.getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
       return SplitVPOp(Op, DAG);
     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
   case ISD::VP_FMAXIMUM:
   case ISD::VP_FMINIMUM:
-    if (Op.getValueType() == MVT::nxv32f16 &&
-        (Subtarget.hasVInstructionsF16Minimal() &&
-         !Subtarget.hasVInstructionsF16()))
+    if (isPromotedOpNeedingSplit(Op, Subtarget))
       return SplitVPOp(Op, DAG);
     return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
   case ISD::EXPERIMENTAL_VP_SPLICE:
diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index dc501b82417d3d..b4740f223eca3a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -233,10 +233,10 @@ define void @trunc_fp16() {
 ;
 ; ZVFHMIN-LABEL: 'trunc_fp16'
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.trunc.f16(half undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> undef)
@@ -1108,10 +1108,10 @@ define void @vp_roundtozero_f16() {
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; ZVFHMIN-LABEL: 'vp_roundtozero_f16'
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index d613e4ee0bc256..15cff650765efa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -1,22 +1,428 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=CHECK,ZVFHMIN
 
+declare <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv1bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/108937