[llvm] 50082d6 - DAG: Fix widening of fptrunc_round vectors (#89918)

Wed Apr 24 07:21:46 PDT 2024

Author: Matt Arsenault
Date: 2024-04-24T16:21:40+02:00
New Revision: 50082d64e633b9baa918a209fc9105aa330b89fa

URL: https://github.com/llvm/llvm-project/commit/50082d64e633b9baa918a209fc9105aa330b89fa
DIFF: https://github.com/llvm/llvm-project/commit/50082d64e633b9baa918a209fc9105aa330b89fa.diff

LOG: DAG: Fix widening of fptrunc_round vectors (#89918)

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 0483f7c74f91a2..9c855e55855312 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -984,7 +984,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
   SDValue WidenVecRes_XRINT(SDNode *N);
   SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
-  SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
+  SDValue WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N);
   SDValue WidenVecRes_ExpOp(SDNode *N);
   SDValue WidenVecRes_Unary(SDNode *N);
   SDValue WidenVecRes_InregOp(SDNode *N);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8776d89f4c5bd9..985c9f16ab97cd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4242,7 +4242,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     break;
 
   case ISD::IS_FPCLASS:
-    Res = WidenVecRes_IS_FPCLASS(N);
+  case ISD::FPTRUNC_ROUND:
+    Res = WidenVecRes_UnarySameEltsWithScalarArg(N);
     break;
 
   case ISD::FLDEXP:
@@ -5004,7 +5005,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
   return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+/// Result and first source operand are 
diff erent scalar types, but must have
+/// the same number of elements. There is an additional control argument which
+/// should be passed through unchanged.
+SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) {
   SDValue FpValue = N->getOperand(0);
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 4526efc0d8fa4f..b8c16d2ed3b2f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -266,16 +266,51 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
   ret void
 }
 
-; FIXME
-; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
-;   %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
-;   ret <3 x half> %res
-; }
+define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
+; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT:    ; return to shader part epilog
+  %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
+  ret <3 x half> %res
+}
 
-; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
-;   %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
-;   ret <3 x half> %res
-; }
+define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
+; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT:    ; return to shader part epilog
+  %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
+  ret <3 x half> %res
+}
 
 define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> %a) {
 ; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward: