[llvm] 9620ce9 - [RISCV] Support fixed-length vector FP_ROUND & FP_EXTEND
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 25 04:22:16 PST 2021
Author: Fraser Cormack
Date: 2021-02-25T12:16:06Z
New Revision: 9620ce90d7238c5ff450a83b49cbc4b811d19830
URL: https://github.com/llvm/llvm-project/commit/9620ce90d7238c5ff450a83b49cbc4b811d19830
DIFF: https://github.com/llvm/llvm-project/commit/9620ce90d7238c5ff450a83b49cbc4b811d19830.diff
LOG: [RISCV] Support fixed-length vector FP_ROUND & FP_EXTEND
This patch extends the support for vector FP_ROUND and FP_EXTEND by
including support for fixed-length vector types. Since fixed-length
vectors use "VL" nodes and scalable vectors can use the standard nodes,
there is slightly more to do in the fixed-length case. A helper function
was introduced to try and reduce the divergent paths. It is expected
that this function will similarly come in useful for lowering the
int-to-fp and fp-to-int operations for fixed-length vectors.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D97301
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f18966706639..69610218786d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -588,6 +588,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
+ for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
+ setTruncStoreAction(VT, OtherVT, Expand);
+ }
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
@@ -606,6 +610,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::FP_EXTEND, VT, Custom);
+
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
@@ -1081,6 +1088,21 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
+ SDLoc DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (VT.isScalableVector())
+ return DAG.getFPExtendOrRound(Op, DL, VT);
+ assert(VT.isFixedLengthVector() &&
+ "Unexpected value type for RVV FP extend/round lowering");
+ SDValue Mask, VL;
+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
+ ? RISCVISD::FP_EXTEND_VL
+ : RISCVISD::FP_ROUND_VL;
+ return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1254,33 +1276,86 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// RVV can only do fp_extend to types double the size as the source. We
// custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
// via f32.
+ SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
- MVT SrcVT = Op.getOperand(0).getSimpleValueType();
- // We only need to close the gap between vXf16->vXf64.
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ // Prepare any fixed-length vector operands.
+ MVT ContainerVT = VT;
+ if (SrcVT.isFixedLengthVector()) {
+ ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector(
+ DAG, VT, Subtarget);
+ MVT SrcContainerVT =
+ ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+ }
+
if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
- SrcVT.getVectorElementType() != MVT::f16)
- return Op;
- SDLoc DL(Op);
- MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- SDValue IntermediateRound =
- DAG.getFPExtendOrRound(Op.getOperand(0), DL, InterVT);
- return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
+ SrcVT.getVectorElementType() != MVT::f16) {
+ // For scalable vectors, we only need to close the gap between
+ // vXf16->vXf64.
+ if (!VT.isFixedLengthVector())
+ return Op;
+ // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
+ Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
+ return convertFromScalableVector(VT, Src, DAG, Subtarget);
+ }
+
+ MVT InterVT = VT.changeVectorElementType(MVT::f32);
+ MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
+ SDValue IntermediateExtend = getRVVFPExtendOrRound(
+ Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
+
+ SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
+ DL, DAG, Subtarget);
+ if (VT.isFixedLengthVector())
+ return convertFromScalableVector(VT, Extend, DAG, Subtarget);
+ return Extend;
}
case ISD::FP_ROUND: {
// RVV can only do fp_round to types half the size as the source. We
// custom-lower f64->f16 rounds via RVV's round-to-odd float
// conversion instruction.
+ SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
- MVT SrcVT = Op.getOperand(0).getSimpleValueType();
- // We only need to close the gap between vXf64<->vXf16.
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ // Prepare any fixed-length vector operands.
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ MVT SrcContainerVT =
+ RISCVTargetLowering::getContainerForFixedLengthVector(DAG, SrcVT,
+ Subtarget);
+ ContainerVT =
+ SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+ }
+
if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
- SrcVT.getVectorElementType() != MVT::f64)
- return Op;
- SDLoc DL(Op);
- MVT InterVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ SrcVT.getVectorElementType() != MVT::f64) {
+ // For scalable vectors, we only need to close the gap between
+ // vXf64<->vXf16.
+ if (!VT.isFixedLengthVector())
+ return Op;
+ // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
+ Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
+ return convertFromScalableVector(VT, Src, DAG, Subtarget);
+ }
+
+ SDValue Mask, VL;
+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
SDValue IntermediateRound =
- DAG.getNode(RISCVISD::VFNCVT_ROD, DL, InterVT, Op.getOperand(0));
- return DAG.getFPExtendOrRound(IntermediateRound, DL, VT);
+ DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
+ SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
+ DL, DAG, Subtarget);
+
+ if (VT.isFixedLengthVector())
+ return convertFromScalableVector(VT, Round, DAG, Subtarget);
+ return Round;
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
@@ -5460,7 +5535,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VSLIDEUP_VL)
NODE_NAME_CASE(VSLIDEDOWN_VL)
NODE_NAME_CASE(VID_VL)
- NODE_NAME_CASE(VFNCVT_ROD)
+ NODE_NAME_CASE(VFNCVT_ROD_VL)
NODE_NAME_CASE(VECREDUCE_ADD)
NODE_NAME_CASE(VECREDUCE_UMAX)
NODE_NAME_CASE(VECREDUCE_SMAX)
@@ -5498,6 +5573,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(UMAX_VL)
NODE_NAME_CASE(MULHS_VL)
NODE_NAME_CASE(MULHU_VL)
+ NODE_NAME_CASE(FP_ROUND_VL)
+ NODE_NAME_CASE(FP_EXTEND_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
NODE_NAME_CASE(VMAND_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index a75ebc38cc2e..606d171dbb59 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -122,8 +122,9 @@ enum NodeType : unsigned {
VID_VL,
// Matches the semantics of the vfcnvt.rod function (Convert double-width
// float to single-width float, rounding towards odd). Takes a double-width
- // float vector and produces a single-width float vector.
- VFNCVT_ROD,
+ // float vector and produces a single-width float vector. Also has a mask and
+ // VL operand.
+ VFNCVT_ROD_VL,
// These nodes match the semantics of the corresponding RVV vector reduction
// instructions. They produce a vector result which is the reduction
// performed over the first vector operand plus the first element of the
@@ -175,6 +176,8 @@ enum NodeType : unsigned {
UMAX_VL,
MULHS_VL,
MULHU_VL,
+ FP_ROUND_VL,
+ FP_EXTEND_VL,
// Vector compare producing a mask. Fourth operand is input mask. Fifth
// operand is VL.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index c552865c6ec9..a988a0ed2aaf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -538,10 +538,6 @@ foreach mti = AllMasks in {
} // Predicates = [HasStdExtV]
-def riscv_fncvt_rod
- : SDNode<"RISCVISD::VFNCVT_ROD",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>, []>;
-
let Predicates = [HasStdExtV, HasStdExtF] in {
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
@@ -719,12 +715,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
fwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
-
- def : Pat<(fvti.Vector (riscv_fncvt_rod (fwti.Vector fwti.RegClass:$rs1))),
- (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX)
- fwti.RegClass:$rs1, fvti.AVL, fvti.SEW)>;
}
-
} // Predicates = [HasStdExtV, HasStdExtF]
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 2d5f8fa447fc..e1543cabf833 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -96,6 +96,20 @@ def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisVT<5, XLenVT>]>;
def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def SDT_RISCVFPRoundOp_VL : SDTypeProfile<1, 3, [
+ SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>,
+ SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>
+]>;
+def SDT_RISCVFPExtendOp_VL : SDTypeProfile<1, 3, [
+ SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>,
+ SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>
+]>;
+
+def riscv_fpround_vl : SDNode<"RISCVISD::FP_ROUND_VL", SDT_RISCVFPRoundOp_VL>;
+def riscv_fpextend_vl : SDNode<"RISCVISD::FP_EXTEND_VL", SDT_RISCVFPExtendOp_VL>;
+def riscv_fncvt_rod_vl : SDNode<"RISCVISD::VFNCVT_ROD_VL", SDT_RISCVFPRoundOp_VL>;
+
+
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
SDTypeProfile<1, 5, [SDTCVecEltisVT<0, i1>,
SDTCisVec<1>,
@@ -740,6 +754,33 @@ foreach fvti = AllFloatVectors in {
fvti.LMul.MX)
(fvti.Scalar fvti.ScalarRegClass:$rs2),
GPR:$vl, fvti.SEW)>;
+
+ // 14.18. Widening Floating-Point/Integer Type-Convert Instructions
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ def : Pat<(fwti.Vector (riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask true_mask),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1, GPR:$vl, fvti.SEW)>;
+ }
+
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ def : Pat<(fvti.Vector (riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask true_mask),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
+ fwti.RegClass:$rs1, GPR:$vl, fvti.SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask true_mask),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX)
+ fwti.RegClass:$rs1, GPR:$vl, fvti.SEW)>;
+ }
}
} // Predicates = [HasStdExtV, HasStdExtF]
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
new file mode 100644
index 000000000000..db2628822816
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
@@ -0,0 +1,267 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+
+define void @fpext_v2f16_v2f32(<2 x half>* %x, <2 x float>* %y) {
+; CHECK-LABEL: fpext_v2f16_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v26, v25
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vse32.v v26, (a1)
+; CHECK-NEXT: ret
+ %a = load <2 x half>, <2 x half>* %x
+ %d = fpext <2 x half> %a to <2 x float>
+ store <2 x float> %d, <2 x float>* %y
+ ret void
+}
+
+define void @fpext_v2f16_v2f64(<2 x half>* %x, <2 x double>* %y) {
+; CHECK-LABEL: fpext_v2f16_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v26, v25
+; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v25, v26
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vse64.v v25, (a1)
+; CHECK-NEXT: ret
+ %a = load <2 x half>, <2 x half>* %x
+ %d = fpext <2 x half> %a to <2 x double>
+ store <2 x double> %d, <2 x double>* %y
+ ret void
+}
+
+define void @fpext_v8f16_v8f32(<8 x half>* %x, <8 x float>* %y) {
+; LMULMAX8-LABEL: fpext_v8f16_v8f32:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: vsetivli a2, 8, e16,m1,ta,mu
+; LMULMAX8-NEXT: vle16.v v25, (a0)
+; LMULMAX8-NEXT: vfwcvt.f.f.v v26, v25
+; LMULMAX8-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; LMULMAX8-NEXT: vse32.v v26, (a1)
+; LMULMAX8-NEXT: ret
+;
+; LMULMAX1-LABEL: fpext_v8f16_v8f32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
+; LMULMAX1-NEXT: addi a0, a1, 16
+; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vse32.v v27, (a0)
+; LMULMAX1-NEXT: vse32.v v26, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <8 x half>, <8 x half>* %x
+ %d = fpext <8 x half> %a to <8 x float>
+ store <8 x float> %d, <8 x float>* %y
+ ret void
+}
+
+define void @fpext_v8f16_v8f64(<8 x half>* %x, <8 x double>* %y) {
+; LMULMAX8-LABEL: fpext_v8f16_v8f64:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: vsetivli a2, 8, e16,m1,ta,mu
+; LMULMAX8-NEXT: vle16.v v25, (a0)
+; LMULMAX8-NEXT: vfwcvt.f.f.v v26, v25
+; LMULMAX8-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; LMULMAX8-NEXT: vfwcvt.f.f.v v28, v26
+; LMULMAX8-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX8-NEXT: vse64.v v28, (a1)
+; LMULMAX8-NEXT: ret
+;
+; LMULMAX1-LABEL: fpext_v8f16_v8f64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v27
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v29, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfwcvt.f.f.v v25, v27
+; LMULMAX1-NEXT: addi a0, a1, 48
+; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vse64.v v25, (a0)
+; LMULMAX1-NEXT: addi a0, a1, 32
+; LMULMAX1-NEXT: vse64.v v29, (a0)
+; LMULMAX1-NEXT: vse64.v v28, (a1)
+; LMULMAX1-NEXT: addi a0, a1, 16
+; LMULMAX1-NEXT: vse64.v v26, (a0)
+; LMULMAX1-NEXT: ret
+ %a = load <8 x half>, <8 x half>* %x
+ %d = fpext <8 x half> %a to <8 x double>
+ store <8 x double> %d, <8 x double>* %y
+ ret void
+}
+
+define void @fpround_v2f32_v2f16(<2 x float>* %x, <2 x half>* %y) {
+; CHECK-LABEL: fpround_v2f32_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e16,mf2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v25
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vse16.v v26, (a1)
+; CHECK-NEXT: ret
+ %a = load <2 x float>, <2 x float>* %x
+ %d = fptrunc <2 x float> %a to <2 x half>
+ store <2 x half> %d, <2 x half>* %y
+ ret void
+}
+
+define void @fpround_v2f64_v2f16(<2 x double>* %x, <2 x half>* %y) {
+; CHECK-LABEL: fpround_v2f64_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
+; CHECK-NEXT: vle64.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v26, v25
+; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v26
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vse16.v v25, (a1)
+; CHECK-NEXT: ret
+ %a = load <2 x double>, <2 x double>* %x
+ %d = fptrunc <2 x double> %a to <2 x half>
+ store <2 x half> %d, <2 x half>* %y
+ ret void
+}
+
+define void @fpround_v8f32_v8f16(<8 x float>* %x, <8 x half>* %y) {
+; LMULMAX8-LABEL: fpround_v8f32_v8f16:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX8-NEXT: vle32.v v26, (a0)
+; LMULMAX8-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX8-NEXT: vfncvt.f.f.w v25, v26
+; LMULMAX8-NEXT: vse16.v v25, (a1)
+; LMULMAX8-NEXT: ret
+;
+; LMULMAX1-LABEL: fpround_v8f32_v8f16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi sp, sp, -16
+; LMULMAX1-NEXT: .cfi_def_cfa_offset 16
+; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: addi a2, a0, 16
+; LMULMAX1-NEXT: vle32.v v25, (a2)
+; LMULMAX1-NEXT: vle32.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vfncvt.f.f.w v27, v25
+; LMULMAX1-NEXT: addi a0, sp, 8
+; LMULMAX1-NEXT: vsetivli a2, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v27, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vfncvt.f.f.w v25, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v25, (sp)
+; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (sp)
+; LMULMAX1-NEXT: vse16.v v25, (a1)
+; LMULMAX1-NEXT: addi sp, sp, 16
+; LMULMAX1-NEXT: ret
+ %a = load <8 x float>, <8 x float>* %x
+ %d = fptrunc <8 x float> %a to <8 x half>
+ store <8 x half> %d, <8 x half>* %y
+ ret void
+}
+
+define void @fpround_v8f64_v8f16(<8 x double>* %x, <8 x half>* %y) {
+; LMULMAX8-LABEL: fpround_v8f64_v8f16:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: vsetivli a2, 8, e64,m4,ta,mu
+; LMULMAX8-NEXT: vle64.v v28, (a0)
+; LMULMAX8-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; LMULMAX8-NEXT: vfncvt.rod.f.f.w v26, v28
+; LMULMAX8-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX8-NEXT: vfncvt.f.f.w v25, v26
+; LMULMAX8-NEXT: vse16.v v25, (a1)
+; LMULMAX8-NEXT: ret
+;
+; LMULMAX1-LABEL: fpround_v8f64_v8f16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi sp, sp, -32
+; LMULMAX1-NEXT: .cfi_def_cfa_offset 32
+; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vle64.v v25, (a0)
+; LMULMAX1-NEXT: addi a2, a0, 32
+; LMULMAX1-NEXT: vle64.v v26, (a2)
+; LMULMAX1-NEXT: addi a2, a0, 48
+; LMULMAX1-NEXT: vle64.v v27, (a2)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle64.v v28, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfncvt.rod.f.f.w v29, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfncvt.f.f.w v27, v29
+; LMULMAX1-NEXT: addi a0, sp, 12
+; LMULMAX1-NEXT: vsetivli a2, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v27, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfncvt.rod.f.f.w v27, v28
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfncvt.f.f.w v28, v27
+; LMULMAX1-NEXT: addi a0, sp, 4
+; LMULMAX1-NEXT: vsetivli a2, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v28, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfncvt.rod.f.f.w v27, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfncvt.f.f.w v26, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: addi a0, sp, 8
+; LMULMAX1-NEXT: vse16.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: addi a0, sp, 8
+; LMULMAX1-NEXT: vle16.v v26, (a0)
+; LMULMAX1-NEXT: addi a0, sp, 24
+; LMULMAX1-NEXT: vse16.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vfncvt.rod.f.f.w v26, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vfncvt.f.f.w v25, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v25, (sp)
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (sp)
+; LMULMAX1-NEXT: addi a0, sp, 16
+; LMULMAX1-NEXT: vse16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: addi a0, sp, 16
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vse16.v v25, (a1)
+; LMULMAX1-NEXT: addi sp, sp, 32
+; LMULMAX1-NEXT: ret
+ %a = load <8 x double>, <8 x double>* %x
+ %d = fptrunc <8 x double> %a to <8 x half>
+ store <8 x half> %d, <8 x half>* %y
+ ret void
+}
More information about the llvm-commits
mailing list