[llvm-branch-commits] [llvm] [LoongArch] Add support for vector FP_ROUND from vxf64 to vxf32 (PR #164059)
Zhaoxin Yang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Oct 18 00:14:01 PDT 2025
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/164059
In LoongArch, [x]vfcvt.s.d intstructions require two vector registers for v4f64->v4f32, v8f64->v8f32 conversions.
This patch handles these cases:
- For FP_ROUND v2f64->v2f32(illegal), add a customized v2f32 widening to convert it into a target-specific LoongArchISD::VFCVT.
- For FP_ROUND v4f64->v4f32, on LSX platforms, v4f64 is illegal and will be split into two v2f64->v2f32, resulting in two LoongArchISD::VFCVT. Finally, they are combined into a single node during combining LoongArchISD::VPACKEV. On LASX platforms, v4f64->v4f32 can directly lower to vfcvt.s.d in lowerFP_ROUND.
- For FP_ROUND v8f64->v8f32, on LASX platforms, v8f64 is illegal and will be split into two v4f64->v4f32 and then combine using ISD::CONCAT_VECTORS, so xvfcvt.s.d is generated during its combination.
>From 92bcc4b7aea5542b6d758b71b1506c2fe62256c0 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Fri, 17 Oct 2025 17:59:17 +0800
Subject: [PATCH] [LoongArch] Add support for vector FP_ROUND from vxf64 to
vxf32
In LoongArch, [x]vfcvt.s.d intstructions require two vector registers
for v4f64->v4f32, v8f64->v8f32 conversions.
This patch handles these cases:
- For FP_ROUND v2f64->v2f32(illegal), add a customized v2f32 widening
to convert it into a target-specific LoongArchISD::VFCVT.
- For FP_ROUND v4f64->v4f32, on LSX platforms, v4f64 is illegal and will
be split into two v2f64->v2f32, resulting in two LoongArchISD::VFCVT.
Finally, they are combined into a single node during combining
LoongArchISD::VPACKEV. On LASX platforms, v4f64->v4f32 can directly
lower to vfcvt.s.d in lowerFP_ROUND.
- For FP_ROUND v8f64->v8f32, on LASX platforms, v8f64 is illegal and
will be split into two v4f64->v4f32 and then combine using
ISD::CONCAT_VECTORS, so xvfcvt.s.d is generated during its
combination.
---
.../LoongArch/LoongArchISelLowering.cpp | 134 +++++++++++++++++-
.../Target/LoongArch/LoongArchISelLowering.h | 3 +
.../LoongArch/LoongArchLASXInstrInfo.td | 4 +
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 7 +
.../LoongArch/lasx/ir-instruction/fptrunc.ll | 48 ++-----
.../LoongArch/lsx/ir-instruction/fptrunc.ll | 27 +---
6 files changed, 162 insertions(+), 61 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f7deeafc9ccfc..e6f3018a2205c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -386,6 +386,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
}
// Set operations for 'LASX' feature.
@@ -448,6 +449,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
}
+ setOperationAction(ISD::FP_ROUND, MVT::v4f32, Custom);
}
// Set DAG combine for LA32 and LA64.
@@ -466,8 +468,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
// Set DAG combine for 'LASX' feature.
- if (Subtarget.hasExtLASX())
+ if (Subtarget.hasExtLASX()) {
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::CONCAT_VECTORS);
+ }
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -592,7 +596,100 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerVECREDUCE(Op, DAG);
case ISD::ConstantFP:
return lowerConstantFP(Op, DAG);
+ case ISD::FP_ROUND:
+ return lowerFP_ROUND(Op, DAG);
+ }
+ return SDValue();
+}
+
+// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
+// LoongArchISD::VFCVT. For example:
+// x1 = fp_round x, 0
+// y1 = fp_round y, 0
+// z = concat_vectors x1, y1
+// Or
+// x1 = LoongArch::VFCVT undef, x
+// y1 = LoongArch::VFCVT undef, y
+// z = LoongArchISD::VPACKEV y1, x1
+// can be combined to:
+// z = LoongArch::VFCVT y, x
+static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
+ (N->getOpcode() == LoongArchISD::VPACKEV)) &&
+ "Invalid Node");
+
+ SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
+ SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
+ unsigned Opcode0 = Op0.getOpcode();
+ unsigned Opcode1 = Op1.getOpcode();
+ if (Opcode0 != Opcode1)
+ return SDValue();
+
+ if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
+ return SDValue();
+
+ // Check if two nodes have only one use.
+ if (!Op0.hasOneUse() || !Op1.hasOneUse())
+ return SDValue();
+
+ EVT VT = N.getValueType();
+ EVT SVT0 = Op0.getValueType();
+ EVT SVT1 = Op1.getValueType();
+ // Check if two nodes have the same result type.
+ if (SVT0 != SVT1)
+ return SDValue();
+
+ // Check if two nodes have the same operand type.
+ EVT SSVT0 = Op0.getOperand(0).getValueType();
+ EVT SSVT1 = Op1.getOperand(0).getValueType();
+ if (SSVT0 != SSVT1)
+ return SDValue();
+
+ if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
+ if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
+ SSVT0 == MVT::v4f64) {
+ // A vector_shuffle is required in the final step, as xvfcvt instruction
+ // operates on each 128-bit segament as a lane.
+ SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
+ Op1.getOperand(0), Op0.getOperand(0));
+ SDValue Undef = DAG.getUNDEF(VT);
+ SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
+ Res = DAG.getVectorShuffle(VT, DL, Res, Undef, Mask);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+
+ if (N->getOpcode() == LoongArchISD::VPACKEV &&
+ Opcode0 == LoongArchISD::VFCVT) {
+ // For VPACKEV, check if the first operation of LoongArchISD::VFCVT is undef.
+ if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
+ return SDValue();
+
+ if (Subtarget.hasExtLSX() && (VT == MVT::v2i64 || VT == MVT::v2f64) &&
+ SVT0 == MVT::v4f32 && SSVT0 == MVT::v2f64) {
+ SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
+ Op0.getOperand(1), Op1.getOperand(1));
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue In = Op.getOperand(0);
+ MVT VT = Op.getSimpleValueType();
+ MVT SVT = In.getSimpleValueType();
+
+ if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
+ return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
}
+
return SDValue();
}
@@ -4720,6 +4817,20 @@ void LoongArchTargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
break;
}
+ case ISD::FP_ROUND: {
+ assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
+ "Unexpected custom legalisation");
+ // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
+ // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
+ // a target-specific LoongArchISD::VFCVT to optimize it.
+ if (VT == MVT::v2f32) {
+ SDValue Src = N->getOperand(0);
+ SDValue Undef = DAG.getUNDEF(Src.getValueType());
+ SDValue Dst = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Src);
+ Results.push_back(Dst);
+ }
+ break;
+ }
case ISD::BSWAP: {
SDValue Src = N->getOperand(0);
assert((VT == MVT::i16 || VT == MVT::i32) &&
@@ -6679,6 +6790,20 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue
+performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector() && N->getNumOperands() == 2)
+ if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
+ return R;
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6714,6 +6839,12 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
+ case ISD::CONCAT_VECTORS:
+ return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
+ case LoongArchISD::VPACKEV:
+ if (SDValue Result =
+ combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
+ return Result;
}
return SDValue();
}
@@ -7512,6 +7643,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
+ NODE_NAME_CASE(VFCVT)
NODE_NAME_CASE(VSLLI)
NODE_NAME_CASE(VSRLI)
NODE_NAME_CASE(VBSLL)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d7748467c7..b69e6240cab61 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -167,6 +167,8 @@ enum NodeType : unsigned {
FRECIPE,
FRSQRTE,
+ VFCVT,
+
// Vector logicial left / right shift by immediate
VSLLI,
VSRLI,
@@ -415,6 +417,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 5143d53bad719..0a170de40f834 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2403,6 +2403,10 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm),
(XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>;
+// Vector floating-point conversion
+def : Pat<(v8f32 (loongarch_vfcvt_s_d (v4f64 LASX256:$xj), (v4f64 LASX256:$xk))),
+ (XVFCVT_S_D LASX256:$xj, LASX256:$xk)>;
+
// load
def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
(XVLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 8d1dc99e316c9..844d391b49c3f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -32,6 +32,8 @@ def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTC
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>;
def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
+def SDT_LoongArchVFCVT_S_D : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisVec<1>, SDTCisFP<1>, SDTCisSameAs<1, 2>]>;
// Target nodes.
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
@@ -82,6 +84,8 @@ def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>;
+def loongarch_vfcvt_s_d: SDNode<"LoongArchISD::VFCVT", SDT_LoongArchVFCVT_S_D>;
+
def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>;
@@ -2519,6 +2523,9 @@ def : Pat<(f64 (froundeven FPR64:$fj)),
(f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+def : Pat<(v4f32 (loongarch_vfcvt_s_d (v2f64 LSX128:$vj), (v2f64 LSX128:$vk))),
+ (VFCVT_S_D LSX128:$vj, LSX128:$vk)>;
+
// load
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptrunc.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptrunc.ll
index 6ade53d9ef531..7c6345b7c0057 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptrunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptrunc.ll
@@ -7,18 +7,9 @@ define void @fptrunc_v4f64_to_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: fptrunc_v4f64_to_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
-; CHECK-NEXT: fcvt.s.d $fa1, $fa1
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 0
-; CHECK-NEXT: fcvt.s.d $fa2, $fa2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 2
-; CHECK-NEXT: fcvt.s.d $fa1, $fa1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 3
-; CHECK-NEXT: fcvt.s.d $fa0, $fa0
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
-; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vfcvt.s.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x double>, ptr %a0
@@ -30,32 +21,13 @@ entry:
define void @fptrunc_v8f64_to_v8f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: fptrunc_v8f64_to_v8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 32
-; CHECK-NEXT: xvld $xr1, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1
-; CHECK-NEXT: fcvt.s.d $fa2, $fa2
-; CHECK-NEXT: xvpickve.d $xr3, $xr0, 0
-; CHECK-NEXT: fcvt.s.d $fa3, $fa3
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
-; CHECK-NEXT: fcvt.s.d $fa2, $fa2
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 3
-; CHECK-NEXT: fcvt.s.d $fa0, $fa0
-; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
-; CHECK-NEXT: xvpickve.d $xr0, $xr1, 1
-; CHECK-NEXT: fcvt.s.d $fa0, $fa0
-; CHECK-NEXT: xvpickve.d $xr2, $xr1, 0
-; CHECK-NEXT: fcvt.s.d $fa2, $fa2
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 16
-; CHECK-NEXT: xvpickve.d $xr0, $xr1, 2
-; CHECK-NEXT: fcvt.s.d $fa0, $fa0
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 32
-; CHECK-NEXT: xvpickve.d $xr0, $xr1, 3
-; CHECK-NEXT: fcvt.s.d $fa0, $fa0
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
-; CHECK-NEXT: xvpermi.q $xr2, $xr3, 2
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a1, 32
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI1_0)
+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x double>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptrunc.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptrunc.ll
index acd487a889c4e..e4f2f0906743d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptrunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptrunc.ll
@@ -21,22 +21,15 @@ define void @fptrunc_v2f64_to_v2f32(ptr %res, ptr %a0) nounwind {
; LA32-LABEL: fptrunc_v2f64_to_v2f32:
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
-; LA32-NEXT: vreplvei.d $vr1, $vr0, 0
-; LA32-NEXT: fcvt.s.d $fa1, $fa1
-; LA32-NEXT: vreplvei.d $vr0, $vr0, 1
-; LA32-NEXT: fcvt.s.d $fa0, $fa0
-; LA32-NEXT: fst.s $fa0, $a0, 4
-; LA32-NEXT: fst.s $fa1, $a0, 0
+; LA32-NEXT: vfcvt.s.d $vr0, $vr0, $vr0
+; LA32-NEXT: vstelm.w $vr0, $a0, 4, 1
+; LA32-NEXT: vstelm.w $vr0, $a0, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: fptrunc_v2f64_to_v2f32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: vld $vr0, $a1, 0
-; LA64-NEXT: vreplvei.d $vr1, $vr0, 1
-; LA64-NEXT: fcvt.s.d $fa1, $fa1
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
-; LA64-NEXT: fcvt.s.d $fa0, $fa0
-; LA64-NEXT: vextrins.w $vr0, $vr1, 16
+; LA64-NEXT: vfcvt.s.d $vr0, $vr0, $vr0
; LA64-NEXT: vstelm.d $vr0, $a0, 0, 0
; LA64-NEXT: ret
entry:
@@ -51,17 +44,7 @@ define void @fptrunc_v4f64_to_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a1, 16
-; CHECK-NEXT: vreplvei.d $vr2, $vr0, 1
-; CHECK-NEXT: fcvt.s.d $fa2, $fa2
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: fcvt.s.d $fa0, $fa0
-; CHECK-NEXT: vextrins.w $vr0, $vr2, 16
-; CHECK-NEXT: vreplvei.d $vr2, $vr1, 0
-; CHECK-NEXT: fcvt.s.d $fa2, $fa2
-; CHECK-NEXT: vextrins.w $vr0, $vr2, 32
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 1
-; CHECK-NEXT: fcvt.s.d $fa1, $fa1
-; CHECK-NEXT: vextrins.w $vr0, $vr1, 48
+; CHECK-NEXT: vfcvt.s.d $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
More information about the llvm-branch-commits
mailing list