[llvm] 50082d6 - DAG: Fix widening of fptrunc_round vectors (#89918)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 24 07:21:46 PDT 2024
Author: Matt Arsenault
Date: 2024-04-24T16:21:40+02:00
New Revision: 50082d64e633b9baa918a209fc9105aa330b89fa
URL: https://github.com/llvm/llvm-project/commit/50082d64e633b9baa918a209fc9105aa330b89fa
DIFF: https://github.com/llvm/llvm-project/commit/50082d64e633b9baa918a209fc9105aa330b89fa.diff
LOG: DAG: Fix widening of fptrunc_round vectors (#89918)
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 0483f7c74f91a2..9c855e55855312 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -984,7 +984,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_XRINT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
- SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
+ SDValue WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N);
SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8776d89f4c5bd9..985c9f16ab97cd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4242,7 +4242,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::IS_FPCLASS:
- Res = WidenVecRes_IS_FPCLASS(N);
+ case ISD::FPTRUNC_ROUND:
+ Res = WidenVecRes_UnarySameEltsWithScalarArg(N);
break;
case ISD::FLDEXP:
@@ -5004,7 +5005,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
}
-SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+/// Result and first source operand are
diff erent scalar types, but must have
+/// the same number of elements. There is an additional control argument which
+/// should be passed through unchanged.
+SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) {
SDValue FpValue = N->getOperand(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 4526efc0d8fa4f..b8c16d2ed3b2f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -266,16 +266,51 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
ret void
}
-; FIXME
-; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
-; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
-; ret <3 x half> %res
-; }
+define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
+; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT: ; return to shader part epilog
+ %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
+ ret <3 x half> %res
+}
-; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
-; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
-; ret <3 x half> %res
-; }
+define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
+; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT: ; return to shader part epilog
+ %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
+ ret <3 x half> %res
+}
define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> %a) {
; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward:
More information about the llvm-commits
mailing list