[llvm] AMDGPU: Custom lower fptrunc vectors for f32 -> f16 (PR #141883)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 22:47:13 PDT 2025
================
@@ -6900,14 +6902,44 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG, SDValue Op,
DAG.getTargetConstant(0, DL, MVT::i32));
}
+SDValue SITargetLowering::SplitFP_ROUNDVectorToPacks(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ EVT DstVT = Op.getValueType();
+ unsigned NumElts = DstVT.getVectorNumElements();
+ assert(NumElts % 2 == 0 && "Only handle vectors of even number of elements");
+ if (NumElts == 2) // already packed.
+ return Op;
+
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ LLVMContext &Context = *DAG.getContext();
+ EVT SrcPkVT = EVT::getVectorVT(Context, SrcVT.getScalarType(), 2);
+ EVT DstPkVT = EVT::getVectorVT(Context, DstVT.getScalarType(), 2);
+
+ SDLoc DL(Op);
+ SmallVector<SDValue, 16> Packs;
+ for (unsigned Index = 0; Index < NumElts; Index += 2) {
+ SDValue PkSrc = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SrcPkVT, Src,
+ DAG.getConstant(Index, DL, MVT::i32));
+ SDValue PkDst = DAG.getNode(Opc, DL, DstPkVT, PkSrc,
+ DAG.getTargetConstant(0, DL, MVT::i32));
----------------
arsenm wrote:
This should preserve the original value for the round flag
https://github.com/llvm/llvm-project/pull/141883
More information about the llvm-commits
mailing list