[llvm] f5a6447 - [RISCV] Combine FP_TO_INT to vfwcvt/fvncvt
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 24 01:45:03 PST 2023
Author: Luke Lau
Date: 2023-01-24T09:44:57Z
New Revision: f5a644719657c25bdb298c17374578d2e9202197
URL: https://github.com/llvm/llvm-project/commit/f5a644719657c25bdb298c17374578d2e9202197
DIFF: https://github.com/llvm/llvm-project/commit/f5a644719657c25bdb298c17374578d2e9202197.diff
LOG: [RISCV] Combine FP_TO_INT to vfwcvt/fvncvt
Adds new pseudo instructions to make sure that the fcvt instructions
have all rounding mode (RM) and unsigned (XU) variants across
single-width, widening and narrowing conversions.
And likewise, extends the VL patterns to accompany them. We don't add
new VL nodes for the widening/narrowing conversions though, instead we
just add specific patterns for vfcvts on those wider/narrower types.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D142102
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 227ffeba44846..23e00b916b984 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9580,9 +9580,12 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
MVT ContainerVT = VT.getSimpleVT();
SDValue XVal = Src.getOperand(0);
- // TODO: Support combining with widening and narrowing instructions
- // For now only support conversions of the same bit size
- if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits())
+ // For widening and narrowing conversions we just combine it into a
+ // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
+ // end up getting lowered to their appropriate pseudo instructions based on
+ // their operand types
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
+ VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
return SDValue();
// Make fixed-length vectors scalable first
@@ -11638,6 +11641,11 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
case RISCV::PseudoQuietFLT_D:
return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
+
+ // =========================================================================
+ // VFCVT
+ // =========================================================================
+
case RISCV::PseudoVFCVT_RM_X_F_V_M1_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
case RISCV::PseudoVFCVT_RM_X_F_V_M2_MASK:
@@ -11650,6 +11658,7 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
+
case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK);
case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK:
@@ -11662,6 +11671,7 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK);
case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK);
+
case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK);
case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK:
@@ -11674,6 +11684,102 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF2_MASK);
case RISCV::PseudoVFCVT_RM_F_XU_V_MF4_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF4_MASK);
+
+ case RISCV::PseudoVFCVT_RM_F_X_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M1_MASK);
+ case RISCV::PseudoVFCVT_RM_F_X_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M2_MASK);
+ case RISCV::PseudoVFCVT_RM_F_X_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M4_MASK);
+ case RISCV::PseudoVFCVT_RM_F_X_V_M8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M8_MASK);
+ case RISCV::PseudoVFCVT_RM_F_X_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
+ case RISCV::PseudoVFCVT_RM_F_X_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
+
+ // =========================================================================
+ // VFWCVT
+ // =========================================================================
+
+ case RISCV::PseudoVFWCVT_RM_XU_F_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK);
+ case RISCV::PseudoVFWCVT_RM_XU_F_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK);
+ case RISCV::PseudoVFWCVT_RM_XU_F_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK);
+ case RISCV::PseudoVFWCVT_RM_XU_F_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK);
+ case RISCV::PseudoVFWCVT_RM_XU_F_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK);
+
+ case RISCV::PseudoVFWCVT_RM_X_F_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK);
+ case RISCV::PseudoVFWCVT_RM_X_F_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK);
+ case RISCV::PseudoVFWCVT_RM_X_F_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK);
+ case RISCV::PseudoVFWCVT_RM_X_F_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK);
+ case RISCV::PseudoVFWCVT_RM_X_F_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK);
+
+ case RISCV::PseudoVFWCVT_RM_F_XU_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_XU_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_XU_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_XU_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_XU_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_XU_V_MF8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK);
+
+ case RISCV::PseudoVFWCVT_RM_F_X_V_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_X_V_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_X_V_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_X_V_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_X_V_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK);
+ case RISCV::PseudoVFWCVT_RM_F_X_V_MF8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK);
+
+ // =========================================================================
+ // VFNCVT
+ // =========================================================================
+
+ case RISCV::PseudoVFNCVT_RM_XU_F_W_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK);
+ case RISCV::PseudoVFNCVT_RM_XU_F_W_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK);
+ case RISCV::PseudoVFNCVT_RM_XU_F_W_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK);
+ case RISCV::PseudoVFNCVT_RM_XU_F_W_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK);
+ case RISCV::PseudoVFNCVT_RM_XU_F_W_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK);
+ case RISCV::PseudoVFNCVT_RM_XU_F_W_MF8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_XU_F_W_MF8_MASK);
+
+ case RISCV::PseudoVFNCVT_RM_X_F_W_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK);
+ case RISCV::PseudoVFNCVT_RM_X_F_W_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK);
+ case RISCV::PseudoVFNCVT_RM_X_F_W_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK);
+ case RISCV::PseudoVFNCVT_RM_X_F_W_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK);
+ case RISCV::PseudoVFNCVT_RM_X_F_W_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK);
+ case RISCV::PseudoVFNCVT_RM_X_F_W_MF8_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF8_MASK);
+
case RISCV::PseudoVFNCVT_RM_F_XU_W_M1_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK);
case RISCV::PseudoVFNCVT_RM_F_XU_W_M2_MASK:
@@ -11684,6 +11790,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK);
case RISCV::PseudoVFNCVT_RM_F_XU_W_MF4_MASK:
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK);
+
+ case RISCV::PseudoVFNCVT_RM_F_X_W_M1_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK);
+ case RISCV::PseudoVFNCVT_RM_F_X_W_M2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M2_MASK);
+ case RISCV::PseudoVFNCVT_RM_F_X_W_M4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M4_MASK);
+ case RISCV::PseudoVFNCVT_RM_F_X_W_MF2_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK);
+ case RISCV::PseudoVFNCVT_RM_F_X_W_MF4_MASK:
+ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK);
+
case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
RISCV::PseudoVFCVT_F_X_V_M1_MASK);
@@ -13283,6 +13401,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SINT_TO_FP_VL)
NODE_NAME_CASE(UINT_TO_FP_VL)
NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
+ NODE_NAME_CASE(VFCVT_RM_F_X_VL)
NODE_NAME_CASE(FP_EXTEND_VL)
NODE_NAME_CASE(FP_ROUND_VL)
NODE_NAME_CASE(VWMUL_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7c77f9c2fe8b0..acf92cab35984 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ enum NodeType : unsigned {
VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
SINT_TO_FP_VL,
UINT_TO_FP_VL,
+ VFCVT_RM_F_X_VL, // Has a rounding mode operand.
VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
FP_ROUND_VL,
FP_EXTEND_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 4d52730d1dd39..3f69b5e41cf11 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3435,6 +3435,18 @@ multiclass VPseudoVWCVTI_V {
}
}
+multiclass VPseudoVWCVTI_RM_V {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ defvar mx = m.MX;
+ defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
+ defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
+
+ defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>,
+ Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
+ }
+}
+
multiclass VPseudoVWCVTF_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
@@ -3447,6 +3459,18 @@ multiclass VPseudoVWCVTF_V {
}
}
+multiclass VPseudoVWCVTF_RM_V {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in {
+ defvar mx = m.MX;
+ defvar WriteVFWCvtIToFV_MX = !cast<SchedWrite>("WriteVFWCvtIToFV_" # mx);
+ defvar ReadVFWCvtIToFV_MX = !cast<SchedRead>("ReadVFWCvtIToFV_" # mx);
+
+ defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>,
+ Sched<[WriteVFWCvtIToFV_MX, ReadVFWCvtIToFV_MX, ReadVMask]>;
+ }
+}
+
multiclass VPseudoVWCVTD_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
@@ -3471,6 +3495,18 @@ multiclass VPseudoVNCVTI_W {
}
}
+multiclass VPseudoVNCVTI_RM_W {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in {
+ defvar mx = m.MX;
+ defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
+ defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
+
+ defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
+ Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ }
+}
+
multiclass VPseudoVNCVTF_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
@@ -5510,16 +5546,20 @@ let Uses = [FRM] in {
defm PseudoVFCVT_XU_F : VPseudoVCVTI_V;
defm PseudoVFCVT_X_F : VPseudoVCVTI_V;
}
-defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
-defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
+
defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V;
defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V;
+
+defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
+
defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V;
let Uses = [FRM] in {
defm PseudoVFCVT_F_XU : VPseudoVCVTF_V;
defm PseudoVFCVT_F_X : VPseudoVCVTF_V;
}
defm PseudoVFCVT_RM_F_XU : VPseudoVCVTF_RM_V;
+defm PseudoVFCVT_RM_F_X : VPseudoVCVTF_RM_V;
} // mayRaiseFPException = true
//===----------------------------------------------------------------------===//
@@ -5530,10 +5570,19 @@ let Uses = [FRM] in {
defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V;
defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V;
}
+defm PseudoVFWCVT_RM_XU_F : VPseudoVWCVTI_RM_V;
+defm PseudoVFWCVT_RM_X_F : VPseudoVWCVTI_RM_V;
+
defm PseudoVFWCVT_RTZ_XU_F : VPseudoVWCVTI_V;
defm PseudoVFWCVT_RTZ_X_F : VPseudoVWCVTI_V;
+
+let Uses = [FRM] in {
defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V;
+}
+defm PseudoVFWCVT_RM_F_XU : VPseudoVWCVTF_RM_V;
+defm PseudoVFWCVT_RM_F_X : VPseudoVWCVTF_RM_V;
+
defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V;
} // mayRaiseFPException = true
@@ -5545,15 +5594,23 @@ let Uses = [FRM] in {
defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W;
defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W;
}
+defm PseudoVFNCVT_RM_XU_F : VPseudoVNCVTI_RM_W;
+defm PseudoVFNCVT_RM_X_F : VPseudoVNCVTI_RM_W;
+
defm PseudoVFNCVT_RTZ_XU_F : VPseudoVNCVTI_W;
defm PseudoVFNCVT_RTZ_X_F : VPseudoVNCVTI_W;
+
let Uses = [FRM] in {
defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W;
defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W;
-defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W;
}
-defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
defm PseudoVFNCVT_RM_F_XU : VPseudoVNCVTF_RM_W;
+defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W;
+
+let Uses = [FRM] in
+defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W;
+
+defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
} // Predicates = [HasVInstructionsAnyF]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 8b26e6a2475b4..451962daeada1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -136,6 +136,12 @@ def SDT_RISCVFP2IOp_VL : SDTypeProfile<1, 3, [
SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>,
SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>
]>;
+def SDT_RISCVFP2IOp_RM_VL : SDTypeProfile<1, 4, [
+ SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>,
+ SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>,
+ SDTCisVT<4, XLenVT> // Rounding mode
+]>;
+
def SDT_RISCVI2FPOp_VL : SDTypeProfile<1, 3, [
SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>,
SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>
@@ -143,25 +149,25 @@ def SDT_RISCVI2FPOp_VL : SDTypeProfile<1, 3, [
def SDT_RISCVI2FPOp_RM_VL : SDTypeProfile<1, 4, [
SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>,
SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>,
- SDTCisVT<4, XLenVT>
+ SDTCisVT<4, XLenVT> // Rounding mode
]>;
-def riscv_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL>;
+// Float -> Int
+def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>;
+def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>;
+def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVFP2IOp_RM_VL>;
+def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVFP2IOp_RM_VL>;
+
def riscv_vfcvt_rtz_xu_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_XU_F_VL", SDT_RISCVFP2IOp_VL>;
+def riscv_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL>;
+
+// Int -> Float
def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
def riscv_vfcvt_rm_f_xu_vl : SDNode<"RISCVISD::VFCVT_RM_F_XU_VL", SDT_RISCVI2FPOp_RM_VL>;
+def riscv_vfcvt_rm_f_x_vl : SDNode<"RISCVISD::VFCVT_RM_F_X_VL", SDT_RISCVI2FPOp_RM_VL>;
-def SDT_RISCVVecCvtF2XOp_VL : SDTypeProfile<1, 4, [
- SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>,
- SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>,
- SDTCisVT<4, XLenVT>
-]>;
-def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
-def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVVecCvtF2XOp_VL>;
-def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>;
-def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>;
def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>;
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
@@ -767,6 +773,8 @@ multiclass VPatExtendVL_V<SDNode vop, string inst_name, string suffix,
}
}
+// Single width converting
+
multiclass VPatConvertFP2IVL_V<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
@@ -816,6 +824,8 @@ multiclass VPatConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
}
}
+// Widening converting
+
multiclass VPatWConvertFP2IVL_V<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
@@ -829,6 +839,19 @@ multiclass VPatWConvertFP2IVL_V<SDNode vop, string instruction_name> {
}
}
+multiclass VPatWConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask V0), (XLenVT timm:$frm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ }
+}
+
multiclass VPatWConvertI2FPVL_V<SDNode vop, string instruction_name> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar ivti = vtiToWti.Vti;
@@ -842,7 +865,24 @@ multiclass VPatWConvertI2FPVL_V<SDNode vop, string instruction_name> {
}
}
+multiclass VPatWConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
+ foreach vtiToWti = AllWidenableIntToFloatVectors in {
+ defvar ivti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
+ (ivti.Mask V0), (XLenVT timm:$frm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
+ (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
+ (ivti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW, TA_MA)>;
+ }
+}
+
+// Narrowing converting
+
multiclass VPatNConvertFP2IVL_V<SDNode vop, string instruction_name> {
+ // Reuse the same list of types used in the widening nodes, but just swap the
+ // direction of types around so we're converting from Wti -> Vti
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
@@ -855,6 +895,19 @@ multiclass VPatNConvertFP2IVL_V<SDNode vop, string instruction_name> {
}
}
+multiclass VPatNConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
+ foreach vtiToWti = AllWidenableIntToFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask V0), (XLenVT timm:$frm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask V0), timm:$frm, GPR:$vl, vti.Log2SEW, TA_MA)>;
+ }
+}
+
multiclass VPatNConvertI2FPVL_V<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
@@ -1740,21 +1793,35 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
- defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
- defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_RM_XU_F_V">;
- defm : VPatConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFCVT_XU_F_V">;
- defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
+ defm : VPatConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
+ defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_RM_XU_F_V">;
+ defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
+
defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
- defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
+ defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
+
defm : VPatConvertI2FPVL_V<riscv_uint_to_fp_vl, "PseudoVFCVT_F_XU_V">;
+ defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
+
defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFCVT_RM_F_XU_V">;
+ defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFCVT_RM_F_X_V">;
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
- defm : VPatWConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFWCVT_RTZ_X_F_V">;
+ defm : VPatWConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFWCVT_XU_F_V">;
+ defm : VPatWConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFWCVT_X_F_V">;
+ defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFWCVT_RM_XU_F_V">;
+ defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFWCVT_RM_X_F_V">;
+
defm : VPatWConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFWCVT_RTZ_XU_F_V">;
- defm : VPatWConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFWCVT_F_X_V">;
+ defm : VPatWConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFWCVT_RTZ_X_F_V">;
+
defm : VPatWConvertI2FPVL_V<riscv_uint_to_fp_vl, "PseudoVFWCVT_F_XU_V">;
+ defm : VPatWConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFWCVT_F_X_V">;
+
+ defm : VPatWConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFWCVT_RM_F_XU_V">;
+ defm : VPatWConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFWCVT_RM_F_X_V">;
+
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -1767,12 +1834,20 @@ foreach fvti = AllFloatVectors in {
}
// 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
- defm : VPatNConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFNCVT_RTZ_X_F_W">;
+ defm : VPatNConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
+ defm : VPatNConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
+ defm : VPatNConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFNCVT_RM_XU_F_W">;
+ defm : VPatNConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFNCVT_RM_X_F_W">;
+
defm : VPatNConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFNCVT_RTZ_XU_F_W">;
- defm : VPatNConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFNCVT_F_X_W">;
+ defm : VPatNConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFNCVT_RTZ_X_F_W">;
+
defm : VPatNConvertI2FPVL_V<riscv_uint_to_fp_vl, "PseudoVFNCVT_F_XU_W">;
- defm :
- VPatNConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFNCVT_RM_F_XU_W">;
+ defm : VPatNConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFNCVT_F_X_W">;
+
+ defm : VPatNConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFNCVT_RM_F_XU_W">;
+ defm : VPatNConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_RM_F_X_W">;
+
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
index de270fce84ea0..b2eadd45ea5f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll
@@ -177,32 +177,14 @@ define <vscale x 1 x i16> @trunc_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
define <vscale x 1 x i32> @trunc_nxv1f64_to_si32(<vscale x 1 x double> %x) {
; RV32-LABEL: trunc_nxv1f64_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI4_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI4_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f64_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI4_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI4_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -214,32 +196,14 @@ define <vscale x 1 x i32> @trunc_nxv1f64_to_si32(<vscale x 1 x double> %x) {
define <vscale x 1 x i32> @trunc_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
; RV32-LABEL: trunc_nxv1f64_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI5_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI5_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f64_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI5_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI5_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -455,32 +419,14 @@ define <vscale x 4 x i16> @trunc_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
define <vscale x 4 x i32> @trunc_nxv4f64_to_si32(<vscale x 4 x double> %x) {
; RV32-LABEL: trunc_nxv4f64_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI12_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f64_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI12_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
@@ -492,32 +438,14 @@ define <vscale x 4 x i32> @trunc_nxv4f64_to_si32(<vscale x 4 x double> %x) {
define <vscale x 4 x i32> @trunc_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
; RV32-LABEL: trunc_nxv4f64_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI13_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI13_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f64_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI13_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI13_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
@@ -749,37 +677,21 @@ define <vscale x 1 x i16> @ceil_nxv1f64_to_ui16(<vscale x 1 x double> %x) {
define <vscale x 1 x i32> @ceil_nxv1f64_to_si32(<vscale x 1 x double> %x) {
; RV32-LABEL: ceil_nxv1f64_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI20_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI20_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f64_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI20_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI20_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
@@ -790,37 +702,21 @@ define <vscale x 1 x i32> @ceil_nxv1f64_to_si32(<vscale x 1 x double> %x) {
define <vscale x 1 x i32> @ceil_nxv1f64_to_ui32(<vscale x 1 x double> %x) {
; RV32-LABEL: ceil_nxv1f64_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI21_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI21_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f64_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI21_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI21_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
@@ -1063,37 +959,21 @@ define <vscale x 4 x i16> @ceil_nxv4f64_to_ui16(<vscale x 4 x double> %x) {
define <vscale x 4 x i32> @ceil_nxv4f64_to_si32(<vscale x 4 x double> %x) {
; RV32-LABEL: ceil_nxv4f64_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI28_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI28_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v12, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f64_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI28_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI28_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
+; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v12, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
@@ -1104,37 +984,21 @@ define <vscale x 4 x i32> @ceil_nxv4f64_to_si32(<vscale x 4 x double> %x) {
define <vscale x 4 x i32> @ceil_nxv4f64_to_ui32(<vscale x 4 x double> %x) {
; RV32-LABEL: ceil_nxv4f64_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI29_0)
-; RV32-NEXT: fld ft0, %lo(.LCPI29_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT: vfabs.v v12, v8
-; RV32-NEXT: vmflt.vf v0, v12, ft0
+; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v12, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f64_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI29_0)
-; RV64-NEXT: fld ft0, %lo(.LCPI29_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; RV64-NEXT: vfabs.v v12, v8
-; RV64-NEXT: vmflt.vf v0, v12, ft0
+; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v12, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
index 334bc29e23708..7b4a9206a7faf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll
@@ -91,32 +91,14 @@ define <vscale x 1 x i8> @trunc_nxv1f32_to_ui8(<vscale x 1 x float> %x) {
define <vscale x 1 x i16> @trunc_nxv1f32_to_si16(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI2_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI2_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI2_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI2_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -128,32 +110,14 @@ define <vscale x 1 x i16> @trunc_nxv1f32_to_si16(<vscale x 1 x float> %x) {
define <vscale x 1 x i16> @trunc_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI3_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI3_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI3_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI3_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -199,30 +163,14 @@ define <vscale x 1 x i32> @trunc_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
define <vscale x 1 x i64> @trunc_nxv1f32_to_si64(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI6_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI6_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -234,30 +182,14 @@ define <vscale x 1 x i64> @trunc_nxv1f32_to_si64(<vscale x 1 x float> %x) {
define <vscale x 1 x i64> @trunc_nxv1f32_to_ui64(<vscale x 1 x float> %x) {
; RV32-LABEL: trunc_nxv1f32_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI7_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI7_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f32_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI7_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI7_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -353,32 +285,14 @@ define <vscale x 4 x i8> @trunc_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
define <vscale x 4 x i16> @trunc_nxv4f32_to_si16(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI10_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI10_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
@@ -390,32 +304,14 @@ define <vscale x 4 x i16> @trunc_nxv4f32_to_si16(<vscale x 4 x float> %x) {
define <vscale x 4 x i16> @trunc_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI11_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI11_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI11_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI11_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
@@ -461,30 +357,14 @@ define <vscale x 4 x i32> @trunc_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
define <vscale x 4 x i64> @trunc_nxv4f32_to_si64(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI14_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI14_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8
; RV32-NEXT: vmv4r.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI14_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI14_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8
; RV64-NEXT: vmv4r.v v8, v12
; RV64-NEXT: ret
@@ -496,30 +376,14 @@ define <vscale x 4 x i64> @trunc_nxv4f32_to_si64(<vscale x 4 x float> %x) {
define <vscale x 4 x i64> @trunc_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
; RV32-LABEL: trunc_nxv4f32_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI15_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI15_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8
; RV32-NEXT: vmv4r.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f32_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI15_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI15_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8
; RV64-NEXT: vmv4r.v v8, v12
; RV64-NEXT: ret
@@ -623,37 +487,21 @@ define <vscale x 1 x i8> @ceil_nxv1f32_to_ui8(<vscale x 1 x float> %x) {
define <vscale x 1 x i16> @ceil_nxv1f32_to_si16(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI18_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI18_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI18_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI18_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
@@ -664,37 +512,21 @@ define <vscale x 1 x i16> @ceil_nxv1f32_to_si16(<vscale x 1 x float> %x) {
define <vscale x 1 x i16> @ceil_nxv1f32_to_ui16(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI19_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI19_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI19_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI19_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
@@ -751,35 +583,21 @@ define <vscale x 1 x i32> @ceil_nxv1f32_to_ui32(<vscale x 1 x float> %x) {
define <vscale x 1 x i64> @ceil_nxv1f32_to_si64(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI22_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI22_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI22_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI22_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
@@ -790,35 +608,21 @@ define <vscale x 1 x i64> @ceil_nxv1f32_to_si64(<vscale x 1 x float> %x) {
define <vscale x 1 x i64> @ceil_nxv1f32_to_ui64(<vscale x 1 x float> %x) {
; RV32-LABEL: ceil_nxv1f32_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI23_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI23_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f32_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI23_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI23_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
@@ -921,37 +725,21 @@ define <vscale x 4 x i8> @ceil_nxv4f32_to_ui8(<vscale x 4 x float> %x) {
define <vscale x 4 x i16> @ceil_nxv4f32_to_si16(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_si16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI26_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI26_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v10, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_si16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI26_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI26_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v10, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
@@ -962,37 +750,21 @@ define <vscale x 4 x i16> @ceil_nxv4f32_to_si16(<vscale x 4 x float> %x) {
define <vscale x 4 x i16> @ceil_nxv4f32_to_ui16(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_ui16:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI27_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI27_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v10, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_ui16:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI27_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI27_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v10, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
@@ -1049,35 +821,21 @@ define <vscale x 4 x i32> @ceil_nxv4f32_to_ui32(<vscale x 4 x float> %x) {
define <vscale x 4 x i64> @ceil_nxv4f32_to_si64(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_si64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI30_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI30_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v12, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8
; RV32-NEXT: vmv4r.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_si64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI30_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI30_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v12, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8
; RV64-NEXT: vmv4r.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
@@ -1088,35 +846,21 @@ define <vscale x 4 x i64> @ceil_nxv4f32_to_si64(<vscale x 4 x float> %x) {
define <vscale x 4 x i64> @ceil_nxv4f32_to_ui64(<vscale x 4 x float> %x) {
; RV32-LABEL: ceil_nxv4f32_to_ui64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI31_0)
-; RV32-NEXT: flw ft0, %lo(.LCPI31_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vfabs.v v10, v8
-; RV32-NEXT: vmflt.vf v0, v10, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v12, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8
; RV32-NEXT: vmv4r.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f32_to_ui64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI31_0)
-; RV64-NEXT: flw ft0, %lo(.LCPI31_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vfabs.v v10, v8
-; RV64-NEXT: vmflt.vf v0, v10, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v12, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8
; RV64-NEXT: vmv4r.v v8, v12
; RV64-NEXT: ret
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
index 8a249b4272a81..e291ebf3789bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll
@@ -13,32 +13,14 @@ declare <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half>)
define <vscale x 1 x i8> @trunc_nxv1f16_to_si8(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_si8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI0_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_si8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI0_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -50,32 +32,14 @@ define <vscale x 1 x i8> @trunc_nxv1f16_to_si8(<vscale x 1 x half> %x) {
define <vscale x 1 x i8> @trunc_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_ui8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI1_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI1_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_ui8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI1_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI1_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -121,30 +85,14 @@ define <vscale x 1 x i16> @trunc_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
define <vscale x 1 x i32> @trunc_nxv1f16_to_si32(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI4_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI4_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -156,30 +104,14 @@ define <vscale x 1 x i32> @trunc_nxv1f16_to_si32(<vscale x 1 x half> %x) {
define <vscale x 1 x i32> @trunc_nxv1f16_to_ui32(<vscale x 1 x half> %x) {
; RV32-LABEL: trunc_nxv1f16_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI5_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI5_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv1f16_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI5_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI5_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -271,32 +203,14 @@ declare <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half>)
define <vscale x 4 x i8> @trunc_nxv4f16_to_si8(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_si8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI8_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI8_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_si8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI8_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI8_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -308,32 +222,14 @@ define <vscale x 4 x i8> @trunc_nxv4f16_to_si8(<vscale x 4 x half> %x) {
define <vscale x 4 x i8> @trunc_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_ui8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI9_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI9_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_ui8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI9_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI9_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
@@ -379,30 +275,14 @@ define <vscale x 4 x i16> @trunc_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
define <vscale x 4 x i32> @trunc_nxv4f16_to_si32(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI12_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI12_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.x.f.v v10, v8
; RV32-NEXT: vmv2r.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI12_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI12_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.x.f.v v10, v8
; RV64-NEXT: vmv2r.v v8, v10
; RV64-NEXT: ret
@@ -414,30 +294,14 @@ define <vscale x 4 x i32> @trunc_nxv4f16_to_si32(<vscale x 4 x half> %x) {
define <vscale x 4 x i32> @trunc_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
; RV32-LABEL: trunc_nxv4f16_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI13_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI13_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
-; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV32-NEXT: vfwcvt.rtz.xu.f.v v10, v8
; RV32-NEXT: vmv2r.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: trunc_nxv4f16_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI13_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI13_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
-; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; RV64-NEXT: vfwcvt.rtz.xu.f.v v10, v8
; RV64-NEXT: vmv2r.v v8, v10
; RV64-NEXT: ret
@@ -529,37 +393,21 @@ declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>)
define <vscale x 1 x i8> @ceil_nxv1f16_to_si8(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_si8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI16_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI16_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_si8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI16_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI16_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
@@ -570,37 +418,21 @@ define <vscale x 1 x i8> @ceil_nxv1f16_to_si8(<vscale x 1 x half> %x) {
define <vscale x 1 x i8> @ceil_nxv1f16_to_ui8(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_ui8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI17_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI17_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.xu.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_ui8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI17_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI17_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.xu.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
@@ -657,35 +489,21 @@ define <vscale x 1 x i16> @ceil_nxv1f16_to_ui16(<vscale x 1 x half> %x) {
define <vscale x 1 x i32> @ceil_nxv1f16_to_si32(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI20_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI20_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI20_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI20_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
@@ -696,35 +514,21 @@ define <vscale x 1 x i32> @ceil_nxv1f16_to_si32(<vscale x 1 x half> %x) {
define <vscale x 1 x i32> @ceil_nxv1f16_to_ui32(<vscale x 1 x half> %x) {
; RV32-LABEL: ceil_nxv1f16_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI21_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI21_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv1f16_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI21_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI21_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
@@ -823,37 +627,21 @@ declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>)
define <vscale x 4 x i8> @ceil_nxv4f16_to_si8(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_si8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI24_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI24_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_si8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI24_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI24_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
@@ -864,37 +652,21 @@ define <vscale x 4 x i8> @ceil_nxv4f16_to_si8(<vscale x 4 x half> %x) {
define <vscale x 4 x i8> @ceil_nxv4f16_to_ui8(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_ui8:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI25_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI25_0)(a0)
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_ui8:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI25_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI25_0)(a0)
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
@@ -951,35 +723,21 @@ define <vscale x 4 x i16> @ceil_nxv4f16_to_ui16(<vscale x 4 x half> %x) {
define <vscale x 4 x i32> @ceil_nxv4f16_to_si32(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_si32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI28_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI28_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.x.f.v v10, v8
; RV32-NEXT: vmv2r.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_si32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI28_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI28_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.x.f.v v10, v8
; RV64-NEXT: vmv2r.v v8, v10
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
@@ -990,35 +748,21 @@ define <vscale x 4 x i32> @ceil_nxv4f16_to_si32(<vscale x 4 x half> %x) {
define <vscale x 4 x i32> @ceil_nxv4f16_to_ui32(<vscale x 4 x half> %x) {
; RV32-LABEL: ceil_nxv4f16_to_ui32:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI29_0)
-; RV32-NEXT: flh ft0, %lo(.LCPI29_0)(a0)
; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vfabs.v v9, v8
-; RV32-NEXT: vmflt.vf v0, v9, ft0
+; RV32-NEXT: vmset.m v0
; RV32-NEXT: fsrmi a0, 3
-; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t
; RV32-NEXT: fsrm a0
-; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV32-NEXT: vfwcvt.rtz.xu.f.v v10, v8
; RV32-NEXT: vmv2r.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: ceil_nxv4f16_to_ui32:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI29_0)
-; RV64-NEXT: flh ft0, %lo(.LCPI29_0)(a0)
; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vfabs.v v9, v8
-; RV64-NEXT: vmflt.vf v0, v9, ft0
+; RV64-NEXT: vmset.m v0
; RV64-NEXT: fsrmi a0, 3
-; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t
; RV64-NEXT: fsrm a0
-; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; RV64-NEXT: vfwcvt.rtz.xu.f.v v10, v8
; RV64-NEXT: vmv2r.v v8, v10
; RV64-NEXT: ret
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
More information about the llvm-commits
mailing list