[PATCH] D88573: [SelectionDAG] Add check for BUILD_VECTOR in isKnownNeverNaN

Petar Avramovic via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 1 04:11:50 PDT 2020


Petar.Avramovic updated this revision to Diff 295523.
Petar.Avramovic edited the summary of this revision.
Petar.Avramovic added a comment.
Herald added a reviewer: ecnelises.

Patch also affects constants with vector type. Effectively, during legalization, fcanonicalize will not be inserted when input was vector constant.

  define amdgpu_kernel void @maxnum_v2f16_imm_b(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) {
  entry:
    %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
    %add = fadd <2 x half> %a.val, <half 9.0, half 8.0>
    %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %add, <2 x half> <half 4.0, half 3.0>) ;<half 4.0, half 3.0> will not get fcanonicalized during legalization after this patch
    store <2 x half> %r.val, <2 x half> addrspace(1)* %r
    ret void
  }

This won't be visible in end result since there is fcanonicalize + build_vector combine in SITargetLowering::performFCanonicalizeCombine. Test changes were for non-constant build_vectors.
Add comment to indicate vector constants are handled bellow.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88573/new/

https://reviews.llvm.org/D88573

Files:
  llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
  llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll


Index: llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -146,15 +146,12 @@
 ; FIXME: Should be packed into 2 registers per argument?
 ; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt:
 ; GCN: s_waitcnt
-; GFX9-DAG: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; GFX9-DAG: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX9-DAG: v_and_b32_e32 v1, 0xffff, v1
-; GFX9-DAG: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX9-DAG: v_pk_max_f16 v1, v1, 0
-; GFX9-DAG: v_pk_max_f16 v0, v6, 0
-; GFX9-DAG: v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
-; GFX9-DAG: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
-; GFX9: s_setpc_b64
+; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX9-DAG: v_mad_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX9-DAG: v_pk_max_f16 v1, v1, v1 clamp
+; GFX9: v_mov_b32_e32 v0, v{{[0-9]+}}
+; GFX9-NEXT: s_setpc_b64
 define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
   %src0.ext = fpext <3 x half> %src0 to <3 x float>
   %src1.ext = fpext <3 x half> %src1 to <3 x float>
@@ -168,14 +165,12 @@
 
 ; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt:
 ; GCN: s_waitcnt
-; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_pk_max_f16 v1, v7, 0
-; GFX9-NEXT: v_pk_max_f16 v0, v6, 0
-; GFX9-NEXT: v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
-; GFX9-NEXT: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: v_mov_b32_e32 v0, v6
+; GFX9-NEXT: v_mov_b32_e32 v1, v2
 ; GFX9-NEXT: s_setpc_b64
 define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
   %src0.ext = fpext <4 x half> %src0 to <4 x float>
Index: llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -236,9 +236,8 @@
 
 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_lo:
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
-; GFX9: v_pk_add_f16 [[A]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
-; GFX9: v_pk_max_f16 v1, v1, 0 neg_lo:[1,0]
-; GFX9: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
+; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] clamp{{$}}
 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
@@ -257,8 +256,7 @@
 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_hi:
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
-; GFX9: v_pk_max_f16 v1, v1, 0 neg_hi:[1,0]
-; GFX9: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
+; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_hi:[1,1] clamp{{$}}
 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4101,7 +4101,7 @@
   if (Depth >= MaxRecursionDepth)
     return false; // Limit search depth.
 
-  // TODO: Handle vectors.
+  // Vector constants are handled by checking operands of BUILD_VECTOR bellow.
   // If the value is a constant, we can obviously see if it is a NaN or not.
   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
     return !C->getValueAPF().isNaN() ||
@@ -4199,6 +4199,12 @@
   case ISD::EXTRACT_VECTOR_ELT: {
     return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
   }
+  case ISD::BUILD_VECTOR: {
+    for (const SDValue &Opnd : Op->ops())
+      if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+        return false;
+    return true;
+  }
   default:
     if (Opcode >= ISD::BUILTIN_OP_END ||
         Opcode == ISD::INTRINSIC_WO_CHAIN ||


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88573.295523.patch
Type: text/x-patch
Size: 5044 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201001/1be5e95a/attachment.bin>


More information about the llvm-commits mailing list