[llvm] f0332d1 - [RISCV] Improve vector fceil/ffloor lowering by changing FRM.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 5 19:04:19 PDT 2022
Author: Craig Topper
Date: 2022-09-05T19:03:44-07:00
New Revision: f0332d12aeecf08420f2595e4b07a71849c931e3
URL: https://github.com/llvm/llvm-project/commit/f0332d12aeecf08420f2595e4b07a71849c931e3
DIFF: https://github.com/llvm/llvm-project/commit/f0332d12aeecf08420f2595e4b07a71849c931e3.diff
LOG: [RISCV] Improve vector fceil/ffloor lowering by changing FRM.
This adds new VFCVT pseudoinstructions that take a rounding mode operand. A custom inserter is used to insert additional instructions to change FRM around the
VFCVT.
Some of this is borrowed from D122860, but takes a somewhat different direction. We may migrate to that patch, but for now I was trying to keep this as independent from
RVV intrinsics as I could.
A followup patch will use this approach for FROUND too.
Still need to fix the cost model.
Reviewed By: arcbbb
Differential Revision: https://reviews.llvm.org/D133238
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/test/Analysis/CostModel/RISCV/fround.ll
llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 961dc06e492b7..21d3fc730b526 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1839,8 +1839,6 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
// and back. Taking care to avoid converting values that are nan or already
// correct.
-// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
-// have FRM dependencies modeled yet.
static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -1887,41 +1885,30 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
// Truncate to integer and convert back to FP.
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
- SDValue Truncated =
- DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL);
- Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
- Mask, VL);
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Truncated;
- if (Op.getOpcode() == ISD::FCEIL) {
- // If the truncated value is the greater than or equal to the original
- // value, we've computed the ceil. Otherwise, we went the wrong way and
- // need to increase by 1.
- // FIXME: This should use a masked operation. Handle here or in isel?
- SDValue SplatVal =
- DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
- SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), SplatVal, VL);
- SDValue NeedAdjust = DAG.getNode(
- RISCVISD::SETCC_VL, DL, SetccVT,
- {Truncated, Src, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
- Truncated = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Truncated,
- Splat, Truncated, NeedAdjust, VL);
- } else if (Op.getOpcode() == ISD::FFLOOR) {
- // If the truncated value is the less than or equal to the original value,
- // we've computed the floor. Otherwise, we went the wrong way and need to
- // decrease by 1.
- // FIXME: This should use a masked operation. Handle here or in isel?
- SDValue SplatVal =
- DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
- SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), SplatVal, VL);
- SDValue NeedAdjust = DAG.getNode(
- RISCVISD::SETCC_VL, DL, SetccVT,
- {Src, Truncated, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
- Truncated = DAG.getNode(RISCVISD::FSUB_VL, DL, ContainerVT, Truncated,
- Splat, Truncated, NeedAdjust, VL);
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case ISD::FCEIL:
+ Truncated =
+ DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
+ DAG.getTargetConstant(RISCVFPRndMode::RUP, DL, XLenVT), VL);
+ break;
+ case ISD::FFLOOR:
+ Truncated =
+ DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
+ DAG.getTargetConstant(RISCVFPRndMode::RDN, DL, XLenVT), VL);
+ break;
+ case ISD::FTRUNC:
+ Truncated = DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL);
+ break;
}
+ Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
+ Mask, VL);
+
// Restore the original sign so that -0.0 is preserved.
Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
Src, Src, Mask, VL);
@@ -10664,6 +10651,41 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
return TailMBB;
}
+static MachineBasicBlock *
+emitVFCVT_RM_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) {
+ DebugLoc DL = MI.getDebugLoc();
+
+ const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+
+ // Update FRM and save the old value.
+ BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
+ .addImm(MI.getOperand(4).getImm());
+
+ // Emit an VFCVT without the FRM operand.
+ assert(MI.getNumOperands() == 8);
+ auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6))
+ .add(MI.getOperand(7));
+ if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
+ MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ // Restore FRM.
+ BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
+ .addReg(SavedFRM, RegState::Kill);
+
+ // Erase the pseudoinstruction.
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -10695,6 +10717,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
case RISCV::PseudoQuietFLT_D:
return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
+ case RISCV::PseudoVFCVT_RM_X_F_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
+ case RISCV::PseudoVFCVT_RM_X_F_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
+ case RISCV::PseudoVFCVT_RM_X_F_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
+ case RISCV::PseudoVFCVT_RM_X_F_V_M8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
+ case RISCV::PseudoVFCVT_RM_X_F_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
+ case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
}
}
@@ -12242,6 +12276,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MULHU_VL)
NODE_NAME_CASE(FP_TO_SINT_VL)
NODE_NAME_CASE(FP_TO_UINT_VL)
+ NODE_NAME_CASE(VFCVT_X_F_VL)
NODE_NAME_CASE(SINT_TO_FP_VL)
NODE_NAME_CASE(UINT_TO_FP_VL)
NODE_NAME_CASE(FP_EXTEND_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index dcaa7f24b4c8d..2803d26d94544 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -254,6 +254,7 @@ enum NodeType : unsigned {
FCOPYSIGN_VL, // Has a merge operand
FP_TO_SINT_VL,
FP_TO_UINT_VL,
+ VFCVT_X_F_VL,
SINT_TO_FP_VL,
UINT_TO_FP_VL,
FP_ROUND_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 4a1acd5a47c1a..be420440ac64d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1555,6 +1555,7 @@ class SwapSysRegImm<SysReg SR, list<Register> Regs>
def ReadFRM : ReadSysReg<SysRegFRM, [FRM]>;
def WriteFRM : WriteSysReg<SysRegFRM, [FRM]>;
def WriteFRMImm : WriteSysRegImm<SysRegFRM, [FRM]>;
+def SwapFRMImm : SwapSysRegImm<SysRegFRM, [FRM]>;
let hasSideEffects = true in {
def ReadFFLAGS : ReadSysReg<SysRegFFLAGS, [FFLAGS]>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 782b20e5f0f61..420b432f917ee 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1031,6 +1031,22 @@ class VPseudoUnaryMaskTA<VReg RetClass, VReg OpClass, string Constraint = ""> :
let UsesMaskPolicy = 1;
}
+class VPseudoUnaryMaskTA_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let UsesMaskPolicy = 1;
+ let usesCustomInserter = 1;
+}
+
// mask unary operation without maskedoff
class VPseudoMaskUnarySOutMask:
Pseudo<(outs GPR:$rd),
@@ -2769,12 +2785,28 @@ multiclass VPseudoConversion<VReg RetClass,
}
}
+multiclass VPseudoConversionRM<VReg RetClass,
+ VReg Op1Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA_FRM<RetClass, Op1Class,
+ Constraint>;
+ }
+}
+
multiclass VPseudoVCVTI_V {
foreach m = MxListF in
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
}
+multiclass VPseudoVCVTI_RM_V {
+ foreach m = MxListF in
+ defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
+ Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
+}
+
multiclass VPseudoVCVTF_V {
foreach m = MxListF in
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
@@ -4849,6 +4881,7 @@ defm PseudoVFCVT_X_F : VPseudoVCVTI_V;
}
defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V;
let Uses = [FRM] in {
defm PseudoVFCVT_F_XU : VPseudoVCVTF_V;
defm PseudoVFCVT_F_X : VPseudoVCVTF_V;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 44e977c232bc9..71fda8b925b96 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -146,6 +146,14 @@ def riscv_fp_to_uint_vl : SDNode<"RISCVISD::FP_TO_UINT_VL", SDT_RISCVFP2IOp_VL>;
def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
+def SDT_RISCVVecCvtX2FOp_VL : SDTypeProfile<1, 4, [
+ SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>,
+ SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>,
+ SDTCisVT<4, XLenVT>
+]>;
+
+def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVVecCvtX2FOp_VL>;
+
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>,
SDTCisVec<1>,
@@ -645,6 +653,19 @@ multiclass VPatConvertFP2IVL_V<SDNode vop, string instruction_name> {
}
}
+multiclass VPatConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
+ foreach fvti = AllFloatVectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask V0), (XLenVT timm:$frm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
+ (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+}
+
multiclass VPatConvertI2FPVL_V<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
@@ -1471,6 +1492,7 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
// 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
+ defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_fp_to_sint_vl, "PseudoVFCVT_RTZ_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_fp_to_uint_vl, "PseudoVFCVT_RTZ_XU_F_V">;
defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 59b7cb608b9f4..6edabdfff4b3d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -264,38 +264,38 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
// instruction counts with the following adjustments made:
// * One vsetvli is considered free.
static const CostTblEntry VectorIntrinsicCostTable[]{
- {Intrinsic::floor, MVT::v2f32, 15},
- {Intrinsic::floor, MVT::v4f32, 15},
- {Intrinsic::floor, MVT::v8f32, 15},
- {Intrinsic::floor, MVT::v16f32, 15},
- {Intrinsic::floor, MVT::nxv2f32, 15},
- {Intrinsic::floor, MVT::nxv4f32, 15},
- {Intrinsic::floor, MVT::nxv8f32, 15},
- {Intrinsic::floor, MVT::nxv16f32, 15},
- {Intrinsic::floor, MVT::v2f64, 15},
- {Intrinsic::floor, MVT::v4f64, 15},
- {Intrinsic::floor, MVT::v8f64, 15},
- {Intrinsic::floor, MVT::v16f64, 15},
- {Intrinsic::floor, MVT::nxv1f64, 15},
- {Intrinsic::floor, MVT::nxv2f64, 15},
- {Intrinsic::floor, MVT::nxv4f64, 15},
- {Intrinsic::floor, MVT::nxv8f64, 15},
- {Intrinsic::ceil, MVT::v2f32, 15},
- {Intrinsic::ceil, MVT::v4f32, 15},
- {Intrinsic::ceil, MVT::v8f32, 15},
- {Intrinsic::ceil, MVT::v16f32, 15},
- {Intrinsic::ceil, MVT::nxv2f32, 15},
- {Intrinsic::ceil, MVT::nxv4f32, 15},
- {Intrinsic::ceil, MVT::nxv8f32, 15},
- {Intrinsic::ceil, MVT::nxv16f32, 15},
- {Intrinsic::ceil, MVT::v2f64, 15},
- {Intrinsic::ceil, MVT::v4f64, 15},
- {Intrinsic::ceil, MVT::v8f64, 15},
- {Intrinsic::ceil, MVT::v16f64, 15},
- {Intrinsic::ceil, MVT::nxv1f64, 15},
- {Intrinsic::ceil, MVT::nxv2f64, 15},
- {Intrinsic::ceil, MVT::nxv4f64, 15},
- {Intrinsic::ceil, MVT::nxv8f64, 15},
+ {Intrinsic::floor, MVT::v2f32, 9},
+ {Intrinsic::floor, MVT::v4f32, 9},
+ {Intrinsic::floor, MVT::v8f32, 9},
+ {Intrinsic::floor, MVT::v16f32, 9},
+ {Intrinsic::floor, MVT::nxv2f32, 9},
+ {Intrinsic::floor, MVT::nxv4f32, 9},
+ {Intrinsic::floor, MVT::nxv8f32, 9},
+ {Intrinsic::floor, MVT::nxv16f32, 9},
+ {Intrinsic::floor, MVT::v2f64, 9},
+ {Intrinsic::floor, MVT::v4f64, 9},
+ {Intrinsic::floor, MVT::v8f64, 9},
+ {Intrinsic::floor, MVT::v16f64, 9},
+ {Intrinsic::floor, MVT::nxv1f64, 9},
+ {Intrinsic::floor, MVT::nxv2f64, 9},
+ {Intrinsic::floor, MVT::nxv4f64, 9},
+ {Intrinsic::floor, MVT::nxv8f64, 9},
+ {Intrinsic::ceil, MVT::v2f32, 9},
+ {Intrinsic::ceil, MVT::v4f32, 9},
+ {Intrinsic::ceil, MVT::v8f32, 9},
+ {Intrinsic::ceil, MVT::v16f32, 9},
+ {Intrinsic::ceil, MVT::nxv2f32, 9},
+ {Intrinsic::ceil, MVT::nxv4f32, 9},
+ {Intrinsic::ceil, MVT::nxv8f32, 9},
+ {Intrinsic::ceil, MVT::nxv16f32, 9},
+ {Intrinsic::ceil, MVT::v2f64, 9},
+ {Intrinsic::ceil, MVT::v4f64, 9},
+ {Intrinsic::ceil, MVT::v8f64, 9},
+ {Intrinsic::ceil, MVT::v16f64, 9},
+ {Intrinsic::ceil, MVT::nxv1f64, 9},
+ {Intrinsic::ceil, MVT::nxv2f64, 9},
+ {Intrinsic::ceil, MVT::nxv4f64, 9},
+ {Intrinsic::ceil, MVT::nxv8f64, 9},
{Intrinsic::trunc, MVT::v2f32, 7},
{Intrinsic::trunc, MVT::v4f32, 7},
{Intrinsic::trunc, MVT::v8f32, 7},
diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index f19b2fd964080..d4bd192ade7c3 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -4,23 +4,23 @@
define void @floor() {
; CHECK-LABEL: 'floor'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.floor.f32(float undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %8 = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %9 = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call double @llvm.floor.f64(double undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %11 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %12 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %15 = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %16 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %17 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %18 = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
call float @llvm.floor.f32(float undef)
@@ -47,23 +47,23 @@ define void @floor() {
define void @ceil() {
; CHECK-LABEL: 'ceil'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.ceil.f32(float undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %8 = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %9 = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call double @llvm.ceil.f64(double undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %11 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %12 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %15 = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %16 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %17 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %18 = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
call float @llvm.ceil.f32(float undef)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
index 41e57ddf2173b..fb944b4d87e0a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
@@ -11,18 +11,12 @@ define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv1r.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
@@ -36,18 +30,12 @@ define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv1r.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
ret <vscale x 2 x half> %a
@@ -61,18 +49,12 @@ define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
ret <vscale x 4 x half> %a
@@ -85,19 +67,13 @@ define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v10, v12, ft0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vmv1r.v v11, v10
-; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x)
ret <vscale x 8 x half> %a
@@ -110,19 +86,13 @@ define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v12, v16, ft0
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v13, v12
-; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v12, v8
+; CHECK-NEXT: vmflt.vf v0, v12, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x)
ret <vscale x 16 x half> %a
@@ -135,19 +105,13 @@ define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfabs.v v24, v8
-; CHECK-NEXT: vmflt.vf v16, v24, ft0
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vmv1r.v v17, v16
-; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v17
-; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfabs.v v16, v8
+; CHECK-NEXT: vmflt.vf v0, v16, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
@@ -161,18 +125,12 @@ define <vscale x 1 x float> @ceil_nxv1f32(<vscale x 1 x float> %x) {
; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv1r.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
ret <vscale x 1 x float> %a
@@ -186,18 +144,12 @@ define <vscale x 2 x float> @ceil_nxv2f32(<vscale x 2 x float> %x) {
; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x)
ret <vscale x 2 x float> %a
@@ -210,19 +162,13 @@ define <vscale x 4 x float> @ceil_nxv4f32(<vscale x 4 x float> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v10, v12, ft0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vmv1r.v v11, v10
-; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
ret <vscale x 4 x float> %a
@@ -235,19 +181,13 @@ define <vscale x 8 x float> @ceil_nxv8f32(<vscale x 8 x float> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v12, v16, ft0
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v13, v12
-; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v12, v8
+; CHECK-NEXT: vmflt.vf v0, v12, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> %x)
ret <vscale x 8 x float> %a
@@ -260,19 +200,13 @@ define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vfabs.v v24, v8
-; CHECK-NEXT: vmflt.vf v16, v24, ft0
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vmv1r.v v17, v16
-; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v17
-; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfabs.v v16, v8
+; CHECK-NEXT: vmflt.vf v0, v16, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> %x)
ret <vscale x 16 x float> %a
@@ -286,18 +220,12 @@ define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
ret <vscale x 1 x double> %a
@@ -310,19 +238,13 @@ define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v10, v12, ft0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vmv1r.v v11, v10
-; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %x)
ret <vscale x 2 x double> %a
@@ -335,19 +257,13 @@ define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v12, v16, ft0
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v13, v12
-; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v12, v8
+; CHECK-NEXT: vmflt.vf v0, v12, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
ret <vscale x 4 x double> %a
@@ -360,19 +276,13 @@ define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfabs.v v24, v8
-; CHECK-NEXT: vmflt.vf v16, v24, ft0
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vmv1r.v v17, v16
-; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v17
-; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfabs.v v16, v8
+; CHECK-NEXT: vmflt.vf v0, v16, ft0
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> %x)
ret <vscale x 8 x double> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
index 52fc667aad842..84daa21a499fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
@@ -11,18 +11,12 @@ define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv1r.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
@@ -36,18 +30,12 @@ define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) {
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv1r.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x)
ret <vscale x 2 x half> %a
@@ -61,18 +49,12 @@ define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) {
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x)
ret <vscale x 4 x half> %a
@@ -85,19 +67,13 @@ define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v10, v12, ft0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vmv1r.v v11, v10
-; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> %x)
ret <vscale x 8 x half> %a
@@ -110,19 +86,13 @@ define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v12, v16, ft0
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v13, v12
-; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v12, v8
+; CHECK-NEXT: vmflt.vf v0, v12, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> %x)
ret <vscale x 16 x half> %a
@@ -135,19 +105,13 @@ define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfabs.v v24, v8
-; CHECK-NEXT: vmflt.vf v16, v24, ft0
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vmv1r.v v17, v16
-; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
-; CHECK-NEXT: vmv1r.v v0, v17
-; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfabs.v v16, v8
+; CHECK-NEXT: vmflt.vf v0, v16, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 32 x half> @llvm.floor.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
@@ -161,18 +125,12 @@ define <vscale x 1 x float> @floor_nxv1f32(<vscale x 1 x float> %x) {
; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv1r.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> %x)
ret <vscale x 1 x float> %a
@@ -186,18 +144,12 @@ define <vscale x 2 x float> @floor_nxv2f32(<vscale x 2 x float> %x) {
; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> %x)
ret <vscale x 2 x float> %a
@@ -210,19 +162,13 @@ define <vscale x 4 x float> @floor_nxv4f32(<vscale x 4 x float> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v10, v12, ft0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vmv1r.v v11, v10
-; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> %x)
ret <vscale x 4 x float> %a
@@ -235,19 +181,13 @@ define <vscale x 8 x float> @floor_nxv8f32(<vscale x 8 x float> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v12, v16, ft0
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v13, v12
-; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v12, v8
+; CHECK-NEXT: vmflt.vf v0, v12, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> %x)
ret <vscale x 8 x float> %a
@@ -260,19 +200,13 @@ define <vscale x 16 x float> @floor_nxv16f32(<vscale x 16 x float> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vfabs.v v24, v8
-; CHECK-NEXT: vmflt.vf v16, v24, ft0
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vmv1r.v v17, v16
-; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
-; CHECK-NEXT: vmv1r.v v0, v17
-; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfabs.v v16, v8
+; CHECK-NEXT: vmflt.vf v0, v16, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> %x)
ret <vscale x 16 x float> %a
@@ -286,18 +220,12 @@ define <vscale x 1 x double> @floor_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v9, v9, ft0
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v10, v9
-; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> %x)
ret <vscale x 1 x double> %a
@@ -310,19 +238,13 @@ define <vscale x 2 x double> @floor_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v10, v12, ft0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vmv1r.v v11, v10
-; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfabs.v v10, v8
+; CHECK-NEXT: vmflt.vf v0, v10, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> %x)
ret <vscale x 2 x double> %a
@@ -335,19 +257,13 @@ define <vscale x 4 x double> @floor_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v12, v16, ft0
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v13, v12
-; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v13
-; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v12, v8
+; CHECK-NEXT: vmflt.vf v0, v12, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> %x)
ret <vscale x 4 x double> %a
@@ -360,19 +276,13 @@ define <vscale x 8 x double> @floor_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfabs.v v24, v8
-; CHECK-NEXT: vmflt.vf v16, v24, ft0
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
-; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: vmv1r.v v17, v16
-; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
-; CHECK-NEXT: vmv1r.v v0, v17
-; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
-; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vfabs.v v16, v8
+; CHECK-NEXT: vmflt.vf v0, v16, ft0
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> %x)
ret <vscale x 8 x double> %a
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 44a5a15cc215e..3986bc2d071db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -2030,23 +2030,17 @@ define void @ceil_v8f16(<8 x half>* %x) {
; CHECK-LABEL: ceil_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI94_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v8, v8, ft0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
-; CHECK-NEXT: lui a1, %hi(.LCPI94_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI94_1)(a1)
-; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a1, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x half>, <8 x half>* %x
%b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
@@ -2059,23 +2053,17 @@ define void @ceil_v4f32(<4 x float>* %x) {
; CHECK-LABEL: ceil_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI95_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v8, v8, ft0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
-; CHECK-NEXT: lui a1, %hi(.LCPI95_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI95_1)(a1)
-; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vse32.v v10, (a0)
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a1, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
@@ -2088,23 +2076,17 @@ define void @ceil_v2f64(<2 x double>* %x) {
; CHECK-LABEL: ceil_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT: vle64.v v10, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI96_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v8, v8, ft0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
-; CHECK-NEXT: lui a1, %hi(.LCPI96_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI96_1)(a1)
-; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vse64.v v10, (a0)
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a1, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
@@ -2117,23 +2099,17 @@ define void @floor_v8f16(<8 x half>* %x) {
; CHECK-LABEL: floor_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI97_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v8, v8, ft0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
-; CHECK-NEXT: lui a1, %hi(.LCPI97_1)
-; CHECK-NEXT: flh ft0, %lo(.LCPI97_1)(a1)
-; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a1, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <8 x half>, <8 x half>* %x
%b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
@@ -2146,23 +2122,17 @@ define void @floor_v4f32(<4 x float>* %x) {
; CHECK-LABEL: floor_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI98_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v8, v8, ft0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
-; CHECK-NEXT: lui a1, %hi(.LCPI98_1)
-; CHECK-NEXT: flw ft0, %lo(.LCPI98_1)(a1)
-; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vse32.v v10, (a0)
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a1, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
@@ -2175,23 +2145,17 @@ define void @floor_v2f64(<2 x double>* %x) {
; CHECK-LABEL: floor_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT: vle64.v v10, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: lui a1, %hi(.LCPI99_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1)
-; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: vmflt.vf v8, v8, ft0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
-; CHECK-NEXT: lui a1, %hi(.LCPI99_1)
-; CHECK-NEXT: fld ft0, %lo(.LCPI99_1)(a1)
-; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
-; CHECK-NEXT: vmv.v.v v0, v9
-; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vse64.v v10, (a0)
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, ft0
+; CHECK-NEXT: fsrmi a1, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
More information about the llvm-commits
mailing list