[llvm] DAG: Implement promotion for strict_fpextend (PR #74310)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 04:20:54 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Test is a placeholder, will be merged into the existing test after additional bug fixes for illegal f16 targets are fixed.
---
Full diff: https://github.com/llvm/llvm-project/pull/74310.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp (+23)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1)
- (added) llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll (+43)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 630aa4a07d7b9..f77b3afccfb8f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2214,6 +2214,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT_SAT:
R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
+ case ISD::STRICT_FP_EXTEND:
+ R = PromoteFloatOp_STRICT_FP_EXTEND(N, OpNo);
+ break;
case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
@@ -2276,6 +2279,26 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
}
+SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1);
+
+ SDValue Op = GetPromotedFloat(N->getOperand(1));
+ EVT VT = N->getValueType(0);
+
+ // Desired VT is same as promoted type. Use promoted float directly.
+ if (VT == Op->getValueType(0)) {
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ return Op;
+ }
+
+ // Else, extend the promoted float value to the desired VT.
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), N->getVTList(),
+ N->getOperand(0), Op);
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
// Promote the float operands used for comparison. The true- and false-
// operands have the same type as the result and are promoted, if needed, by
// PromoteFloatRes_SELECT_CC
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e9bd54089d062..4c7ddd4aea9e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -712,6 +712,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll
new file mode 100644
index 0000000000000..a74f6bfd564bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX8 %s
+
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1
+declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1
+
+define float @v_constrained_fpext_f16_to_f32(ptr addrspace(1) %ptr) #0 {
+; GFX8-LABEL: v_constrained_fpext_f16_to_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 s6, 0
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s4, s6
+; GFX8-NEXT: s_mov_b32 s5, s6
+; GFX8-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %val = load half, ptr addrspace(1) %ptr
+ %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %val, metadata !"fpexcept.strict")
+ ret float %result
+}
+
+define <2 x float> @v_constrained_fpext_v2f16_to_v2f32(ptr addrspace(1) %ptr) #0 {
+; GFX8-LABEL: v_constrained_fpext_v2f16_to_v2f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 s6, 0
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s4, s6
+; GFX8-NEXT: s_mov_b32 s5, s6
+; GFX8-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %val = load <2 x half>, ptr addrspace(1) %ptr
+ %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %val, metadata !"fpexcept.strict")
+ ret <2 x float> %result
+}
+
+attributes #0 = { strictfp }
``````````
</details>
https://github.com/llvm/llvm-project/pull/74310
More information about the llvm-commits
mailing list