[llvm] f01f082 - AMDGPU: Make v2f32 -> v2f16 legal when target supports v_cvt_pk_f16_f32 (#139956)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 15:09:25 PDT 2025
Author: Changpeng Fang
Date: 2025-05-15T15:09:21-07:00
New Revision: f01f08292e894a565baa1b2741b31fbcf53a18cb
URL: https://github.com/llvm/llvm-project/commit/f01f08292e894a565baa1b2741b31fbcf53a18cb
DIFF: https://github.com/llvm/llvm-project/commit/f01f08292e894a565baa1b2741b31fbcf53a18cb.diff
LOG: AMDGPU: Make v2f32 -> v2f16 legal when target supports v_cvt_pk_f16_f32 (#139956)
If targets support v_cvt_pk_f16_f32 instruction, v2f32 -> v2f16 should
be legal. However, SelectionDAG does not allow us to specify the source
type in the legalization rules. To workaround this, we make FP_ROUND
Custom for v2f16 then set up v2f32 -> v2f16 to be legal during custom
lowering.
Fixes: SWDEV-532608 -- expected v_cvt_pk_f16_f32 was not generated.
Added:
llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5cd6561914364..70f9485c3e5b4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -919,6 +919,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
}
+ if (Subtarget->hasCvtPkF16F32Inst())
+ setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
+
setTargetDAGCombine({ISD::ADD,
ISD::UADDO_CARRY,
ISD::SUB,
@@ -6899,10 +6902,16 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG, SDValue Op,
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+
+ if (DstVT == MVT::v2f16) {
+ assert(Subtarget->hasCvtPkF16F32Inst() && "support v_cvt_pk_f16_f32");
+ return SrcVT == MVT::v2f32 ? Op : SDValue();
+ }
+
if (SrcVT.getScalarType() != MVT::f64)
return Op;
- EVT DstVT = Op.getValueType();
SDLoc DL(Op);
if (DstVT == MVT::f16) {
// TODO: Handle strictfp
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll
similarity index 93%
rename from llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll
rename to llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll
index 5a5e39489d888..e5815e96fbe33 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll
@@ -12,6 +12,20 @@ define <2 x half> @v_test_cvt_v2f32_v2f16(<2 x float> %src) {
ret <2 x half> %res
}
+define half @fptrunc_v2f32_v2f16_then_extract(<2 x float> %src) {
+; GFX950-LABEL: fptrunc_v2f32_v2f16_then_extract:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
+; GFX950-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX950-NEXT: s_setpc_b64 s[30:31]
+ %vec_half = fptrunc <2 x float> %src to <2 x half>
+ %first = extractelement <2 x half> %vec_half, i64 1
+ %second = extractelement <2 x half> %vec_half, i64 0
+ %res = fadd half %first, %second
+ ret half %res
+}
+
define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
; GFX950-SDAG-LABEL: v_test_cvt_v2f64_v2f16:
; GFX950-SDAG: ; %bb.0:
More information about the llvm-commits
mailing list