[llvm] 597086c - DAG: Implement promotion for strict_fp_round (#74332)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 4 17:44:24 PST 2024
Author: Matt Arsenault
Date: 2024-01-05T08:44:19+07:00
New Revision: 597086c60959dd5b3c032552e8b42dd1d053f233
URL: https://github.com/llvm/llvm-project/commit/597086c60959dd5b3c032552e8b42dd1d053f233
DIFF: https://github.com/llvm/llvm-project/commit/597086c60959dd5b3c032552e8b42dd1d053f233.diff
LOG: DAG: Implement promotion for strict_fp_round (#74332)
Needs an AMDGPU hack to get the selection to work. The ordinary
variant is custom lowered through an almost equivalent target node
that would need a strict variant for additional known bits
optimizations.
Added:
Modified:
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index d4fc9d8a96db2b..22360353790dbc 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -614,6 +614,12 @@ def strict_sint_to_fp : SDNode<"ISD::STRICT_SINT_TO_FP",
SDTIntToFPOp, [SDNPHasChain]>;
def strict_uint_to_fp : SDNode<"ISD::STRICT_UINT_TO_FP",
SDTIntToFPOp, [SDNPHasChain]>;
+
+def strict_f16_to_fp : SDNode<"ISD::STRICT_FP16_TO_FP",
+ SDTIntToFPOp, [SDNPHasChain]>;
+def strict_fp_to_f16 : SDNode<"ISD::STRICT_FP_TO_FP16",
+ SDTFPToIntOp, [SDNPHasChain]>;
+
def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>;
def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>;
@@ -1576,6 +1582,13 @@ def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred),
[(strict_fsetccs node:$lhs, node:$rhs, node:$pred),
(setcc node:$lhs, node:$rhs, node:$pred)]>;
+def any_f16_to_fp : PatFrags<(ops node:$src),
+ [(f16_to_fp node:$src),
+ (strict_f16_to_fp node:$src)]>;
+def any_fp_to_f16 : PatFrags<(ops node:$src),
+ [(fp_to_f16 node:$src),
+ (strict_fp_to_f16 node:$src)]>;
+
multiclass binary_atomic_op_ord {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 65919a64b80653..6e0e1e23419bec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2181,6 +2181,24 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
report_fatal_error("Attempt at an invalid promotion-related conversion");
}
+static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16)
+ return ISD::STRICT_FP16_TO_FP;
+
+ if (RetVT == MVT::f16)
+ return ISD::STRICT_FP_TO_FP16;
+
+ if (OpVT == MVT::bf16) {
+ // TODO: return ISD::STRICT_BF16_TO_FP;
+ }
+
+ if (RetVT == MVT::bf16) {
+ // TODO: return ISD::STRICT_FP_TO_BF16;
+ }
+
+ report_fatal_error("Attempt at an invalid promotion-related conversion");
+}
+
bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG));
SDValue R = SDValue();
@@ -2281,7 +2299,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N,
unsigned OpNo) {
- assert(OpNo == 1);
+ assert(OpNo == 1 && "Promoting unpromotable operand");
SDValue Op = GetPromotedFloat(N->getOperand(1));
EVT VT = N->getValueType(0);
@@ -2416,6 +2434,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
+ case ISD::STRICT_FP_ROUND:
+ R = PromoteFloatRes_STRICT_FP_ROUND(N);
+ break;
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;
@@ -2621,6 +2642,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round);
}
+// Explicit operation to reduce precision. Reduce the value to half precision
+// and promote it back to the legal type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_STRICT_FP_ROUND(SDNode *N) {
+ SDLoc DL(N);
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = Op->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ // Round promoted float to desired precision
+ SDValue Round = DAG.getNode(GetPromotionOpcodeStrict(OpVT, VT), DL,
+ DAG.getVTList(IVT, MVT::Other), Chain, Op);
+ // Promote it back to the legal output type
+ SDValue Res =
+ DAG.getNode(GetPromotionOpcodeStrict(VT, NVT), DL,
+ DAG.getVTList(NVT, MVT::Other), Round.getValue(1), Round);
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 3d21bd22e6ef5d..92598d8856193d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -165,7 +165,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_FP16:
Res = PromoteIntRes_FP_TO_FP16_BF16(N);
break;
-
+ case ISD::STRICT_FP_TO_FP16:
+ Res = PromoteIntRes_STRICT_FP_TO_FP16_BF16(N);
+ break;
case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break;
case ISD::AND:
@@ -787,6 +789,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_STRICT_FP_TO_FP16_BF16(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other),
+ N->getOperand(0), N->getOperand(1));
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_XRINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -1804,6 +1816,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::FP16_TO_FP:
case ISD::VP_UINT_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::STRICT_FP16_TO_FP:
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::VP_ZERO_EXTEND: Res = PromoteIntOp_VP_ZERO_EXTEND(N); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 84b1b2c71fd0be..09f0bca8b8611e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -326,6 +326,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
+ SDValue PromoteIntRes_STRICT_FP_TO_FP16_BF16(SDNode *N);
SDValue PromoteIntRes_XRINT(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
@@ -699,6 +700,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteFloatRes_ExpOp(SDNode *N);
SDValue PromoteFloatRes_FFREXP(SDNode *N);
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
+ SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index e8c4d805dbba9e..1158d16baa1bac 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1097,7 +1097,7 @@ def : Pat <
multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
// f16_to_fp patterns
def : GCNPat <
- (f32 (f16_to_fp i32:$src0)),
+ (f32 (any_f16_to_fp i32:$src0)),
(cvt_f32_f16_inst_e64 SRCMODS.NONE, $src0)
>;
@@ -1151,6 +1151,13 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
(f16 (uint_to_fp i32:$src)),
(cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src))
>;
+
+ // This is only used on targets without half support
+ // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
+ def : GCNPat <
+ (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
+ (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
+ >;
}
let SubtargetPredicate = NotHasTrue16BitInsts in
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll
index 0339fca4d56cf6..8a3647a9b6e939 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll
@@ -3,6 +3,9 @@
declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0
+declare float @llvm.fabs.f32(float)
define float @v_constrained_fpext_f16_to_f32(ptr addrspace(1) %ptr) #0 {
; GFX7-LABEL: v_constrained_fpext_f16_to_f32:
@@ -40,4 +43,68 @@ define <2 x float> @v_constrained_fpext_v2f16_to_v2f32(ptr addrspace(1) %ptr) #0
ret <2 x float> %result
}
+define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg, ptr addrspace(1) %ptr) #0 {
+; GFX7-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+ %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret void
+}
+
+define void @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x float> %arg, ptr addrspace(1) %ptr) #0 {
+; GFX7-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: s_mov_b32 s4, s6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: s_mov_b32 s5, s6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+ %result = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ store <2 x half> %result, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_fneg(float %arg, ptr addrspace(1) %ptr) #0 {
+; GFX7-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_fneg:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+ %neg.arg = fneg float %arg
+ %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret void
+}
+
+define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_fabs(float %arg, ptr addrspace(1) %ptr) #0 {
+; GFX7-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_fabs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e64 v0, |v0|
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+ %abs.arg = call float @llvm.fabs.f32(float %arg)
+ %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret void
+}
+
attributes #0 = { strictfp }
More information about the llvm-commits
mailing list