[llvm] a0d80c2 - [RISCV] Generalize performFP_TO_INTCombine to vectors
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 18 02:53:44 PST 2023
Author: Luke Lau
Date: 2023-01-18T10:53:24Z
New Revision: a0d80c239840ec477dd23071e748e60205ffd6b7
URL: https://github.com/llvm/llvm-project/commit/a0d80c239840ec477dd23071e748e60205ffd6b7
DIFF: https://github.com/llvm/llvm-project/commit/a0d80c239840ec477dd23071e748e60205ffd6b7.diff
LOG: [RISCV] Generalize performFP_TO_INTCombine to vectors
Like in the scalar domain, combine calls to (fp_to_int (ftrunc X)) on
scalable and fixed-length vectors into a single vfcvt instruction.
For truncating rounds, the static vfcvt.rtz rounding mode is used.
Otherwise use the VFCVT_RM_ variants to set the rounding mode
dynamically.
Closes https://github.com/llvm/llvm-project/issues/56737
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D141599
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 315c10887a83..de3dcc29b78c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1915,7 +1915,8 @@ getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
-// the vector type that it is contained in.
+// the vector type that the fixed-length vector is contained in. Otherwise if
+// VecVT is scalable, then ContainerVT should be the same as VecVT.
static std::pair<SDValue, SDValue>
getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@@ -9555,15 +9556,9 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT XLenVT = Subtarget.getXLenVT();
- // Only handle XLen or i32 types. Other types narrower than XLen will
- // eventually be legalized to XLenVT.
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != XLenVT)
- return SDValue();
-
SDValue Src = N->getOperand(0);
- // Ensure the FP type is also legal.
+ // Ensure the FP type is legal.
if (!TLI.isTypeLegal(Src.getValueType()))
return SDValue();
@@ -9575,7 +9570,57 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
if (FRM == RISCVFPRndMode::Invalid)
return SDValue();
+ SDLoc DL(N);
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector() && TLI.isTypeLegal(VT)) {
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT SrcContainerVT = SrcVT;
+ MVT ContainerVT = VT.getSimpleVT();
+ SDValue XVal = Src.getOperand(0);
+
+ // TODO: Support combining with widening and narrowing instructions
+ // For now only support conversions of the same bit size
+ if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits())
+ return SDValue();
+
+ // Make fixed-length vectors scalable first
+ if (SrcVT.isFixedLengthVector()) {
+ SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
+ XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
+ ContainerVT =
+ getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
+ }
+
+ auto [Mask, VL] =
+ getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
+
+ SDValue FpToInt;
+ if (FRM == RISCVFPRndMode::RTZ) {
+ // Use the dedicated trunc static rounding mode if we're truncating so we
+ // don't need to generate calls to fsrmi/fsrm
+ unsigned Opc =
+ IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
+ FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
+ } else {
+ unsigned Opc =
+ IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
+ FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
+ DAG.getTargetConstant(FRM, DL, XLenVT), VL);
+ }
+
+ // If converted from fixed-length to scalable, convert back
+ if (VT.isFixedLengthVector())
+ FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
+
+ return FpToInt;
+ }
+
+ // Only handle XLen or i32 types. Other types narrower than XLen will
+ // eventually be legalized to XLenVT.
+ if (VT != MVT::i32 && VT != XLenVT)
+ return SDValue();
unsigned Opc;
if (VT == XLenVT)
@@ -9583,7 +9628,6 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
else
Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
- SDLoc DL(N);
SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
DAG.getTargetConstant(FRM, DL, XLenVT));
return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
@@ -11604,6 +11648,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M2_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M4_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_M8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M8_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK);
+ case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK);
case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK);
case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK:
@@ -13218,7 +13274,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
NODE_NAME_CASE(VFCVT_RM_X_F_VL)
+ NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
NODE_NAME_CASE(VFCVT_X_F_VL)
+ NODE_NAME_CASE(VFCVT_XU_F_VL)
NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
NODE_NAME_CASE(SINT_TO_FP_VL)
NODE_NAME_CASE(UINT_TO_FP_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 91f1a0f2e343..7c77f9c2fe8b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -239,8 +239,10 @@ enum NodeType : unsigned {
VFCVT_RTZ_X_F_VL,
VFCVT_RTZ_XU_F_VL,
VFCVT_X_F_VL,
+ VFCVT_XU_F_VL,
VFROUND_NOEXCEPT_VL,
VFCVT_RM_X_F_VL, // Has a rounding mode operand.
+ VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
SINT_TO_FP_VL,
UINT_TO_FP_VL,
VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index f2d22048babc..4d52730d1dd3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -5512,6 +5512,7 @@ defm PseudoVFCVT_X_F : VPseudoVCVTI_V;
}
defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V;
defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V;
defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V;
let Uses = [FRM] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index bbb55f8ef257..8b26e6a2475b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -159,7 +159,9 @@ def SDT_RISCVVecCvtF2XOp_VL : SDTypeProfile<1, 4, [
]>;
def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
+def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>;
+def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>;
def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>;
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
@@ -1739,7 +1741,9 @@ foreach fvti = AllFloatVectors in {
// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
+ defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_RM_XU_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
+ defm : VPatConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFCVT_XU_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
index 7f8644edfef2..de270fce84ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
@@ -251,29 +251,13 @@ define <vscale x 1 x i32> @trunc_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
define <vscale x 1 x i64> @trunc_nxv1f64_to_si64(<vscale x 1 x double> %x) {
; RV32-LABEL: trunc_nxv1f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI6_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI6_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI6_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI6_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
@@ -284,29 +268,13 @@ define <vscale x 1 x i64> @trunc_nxv1f64_to_si64(<vscale x 1 x double> %x) {
define <vscale x 1 x i64> @trunc_nxv1f64_to_ui64(<vscale x 1 x double> %x) {
; RV32-LABEL: trunc_nxv1f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI7_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI7_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI7_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI7_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> %x)
@@ -561,29 +529,13 @@ define <vscale x 4 x i32> @trunc_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
define <vscale x 4 x i64> @trunc_nxv4f64_to_si64(<vscale x 4 x double> %x) {
; RV32-LABEL: trunc_nxv4f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI14_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI14_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
@@ -594,29 +546,13 @@ define <vscale x 4 x i64> @trunc_nxv4f64_to_si64(<vscale x 4 x double> %x) {
define <vscale x 4 x i64> @trunc_nxv4f64_to_ui64(<vscale x 4 x double> %x) {
; RV32-LABEL: trunc_nxv4f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI15_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI15_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI15_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI15_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> %x)
@@ -895,34 +831,20 @@ define <vscale x 1 x i32> @ceil_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
define <vscale x 1 x i64> @ceil_nxv1f64_to_si64(<vscale x 1 x double> %x) {
; RV32-LABEL: ceil_nxv1f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI22_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI22_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI22_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI22_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
%b = fptosi <vscale x 1 x double> %a to <vscale x 1 x i64>
@@ -932,34 +854,20 @@ define <vscale x 1 x i64> @ceil_nxv1f64_to_si64(<vscale x 1 x double> %x) {
define <vscale x 1 x i64> @ceil_nxv1f64_to_ui64(<vscale x 1 x double> %x) {
; RV32-LABEL: ceil_nxv1f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI23_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI23_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI23_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI23_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
%b = fptoui <vscale x 1 x double> %a to <vscale x 1 x i64>
@@ -1237,34 +1145,20 @@ define <vscale x 4 x i32> @ceil_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
define <vscale x 4 x i64> @ceil_nxv4f64_to_si64(<vscale x 4 x double> %x) {
; RV32-LABEL: ceil_nxv4f64_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI30_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI30_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f64_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI30_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI30_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
%b = fptosi <vscale x 4 x double> %a to <vscale x 4 x i64>
@@ -1274,34 +1168,20 @@ define <vscale x 4 x i64> @ceil_nxv4f64_to_si64(<vscale x 4 x double> %x) {
define <vscale x 4 x i64> @ceil_nxv4f64_to_ui64(<vscale x 4 x double> %x) {
; RV32-LABEL: ceil_nxv4f64_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI31_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI31_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f64_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI31_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI31_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
%b = fptoui <vscale x 4 x double> %a to <vscale x 4 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
index 1c3612651fe1..334bc29e2370 100644
--- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
@@ -165,29 +165,13 @@ define <vscale x 1 x i16> @trunc_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
define <vscale x 1 x i32> @trunc_nxv1f32_to_si32(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI4_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI4_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI4_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI4_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
@@ -198,29 +182,13 @@ define <vscale x 1 x i32> @trunc_nxv1f32_to_si32(<vscale x 1 x float> %x) {
define <vscale x 1 x i32> @trunc_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI5_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI5_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI5_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI5_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> %x)
@@ -459,29 +427,13 @@ define <vscale x 4 x i16> @trunc_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
define <vscale x 4 x i32> @trunc_nxv4f32_to_si32(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI12_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI12_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI12_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI12_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
@@ -492,29 +444,13 @@ define <vscale x 4 x i32> @trunc_nxv4f32_to_si32(<vscale x 4 x float> %x) {
define <vscale x 4 x i32> @trunc_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI13_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI13_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI13_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI13_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %x)
@@ -769,34 +705,20 @@ define <vscale x 1 x i16> @ceil_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
define <vscale x 1 x i32> @ceil_nxv1f32_to_si32(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI20_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI20_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI20_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI20_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
%b = fptosi <vscale x 1 x float> %a to <vscale x 1 x i32>
@@ -806,34 +728,20 @@ define <vscale x 1 x i32> @ceil_nxv1f32_to_si32(<vscale x 1 x float> %x) {
define <vscale x 1 x i32> @ceil_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI21_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI21_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI21_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI21_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
%b = fptoui <vscale x 1 x float> %a to <vscale x 1 x i32>
@@ -1095,34 +1003,20 @@ define <vscale x 4 x i16> @ceil_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
define <vscale x 4 x i32> @ceil_nxv4f32_to_si32(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI28_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI28_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
%b = fptosi <vscale x 4 x float> %a to <vscale x 4 x i32>
@@ -1132,34 +1026,20 @@ define <vscale x 4 x i32> @ceil_nxv4f32_to_si32(<vscale x 4 x float> %x) {
define <vscale x 4 x i32> @ceil_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI29_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI29_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI29_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI29_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
%b = fptoui <vscale x 4 x float> %a to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
index e48887488428..8a249b4272a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
@@ -87,29 +87,13 @@ define <vscale x 1 x i8> @trunc_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
define <vscale x 1 x i16> @trunc_nxv1f16_to_si16(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI2_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI2_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
@@ -120,29 +104,13 @@ define <vscale x 1 x i16> @trunc_nxv1f16_to_si16(<vscale x 1 x half> %x) {
define <vscale x 1 x i16> @trunc_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI3_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI3_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
@@ -377,29 +345,13 @@ define <vscale x 4 x i8> @trunc_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
define <vscale x 4 x i16> @trunc_nxv4f16_to_si16(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI10_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI10_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI10_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI10_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
@@ -410,29 +362,13 @@ define <vscale x 4 x i16> @trunc_nxv4f16_to_si16(<vscale x 4 x half> %x) {
define <vscale x 4 x i16> @trunc_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI11_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI11_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI11_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI11_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
@@ -675,34 +611,20 @@ define <vscale x 1 x i8> @ceil_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
define <vscale x 1 x i16> @ceil_nxv1f16_to_si16(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI18_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI18_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI18_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI18_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
%b = fptosi <vscale x 1 x half> %a to <vscale x 1 x i16>
@@ -712,34 +634,20 @@ define <vscale x 1 x i16> @ceil_nxv1f16_to_si16(<vscale x 1 x half> %x) {
define <vscale x 1 x i16> @ceil_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI19_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI19_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI19_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI19_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
%b = fptoui <vscale x 1 x half> %a to <vscale x 1 x i16>
@@ -997,34 +905,20 @@ define <vscale x 4 x i8> @ceil_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
define <vscale x 4 x i16> @ceil_nxv4f16_to_si16(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI26_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI26_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI26_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI26_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
%b = fptosi <vscale x 4 x half> %a to <vscale x 4 x i16>
@@ -1034,34 +928,20 @@ define <vscale x 4 x i16> @ceil_nxv4f16_to_si16(<vscale x 4 x half> %x) {
define <vscale x 4 x i16> @ceil_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI27_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI27_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI27_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI27_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
%b = fptoui <vscale x 4 x half> %a to <vscale x 4 x i16>
More information about the llvm-commits
mailing list