[PATCH] D136085: AMDGPU: Fold undef rcp to qnan

Mon Oct 17 08:05:50 PDT 2022

arsenm created this revision.
arsenm added a reviewer: AMDGPU.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.

This matches the behavior in instcombine, and for fdiv.


https://reviews.llvm.org/D136085

Files:
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
  llvm/test/CodeGen/AMDGPU/select-undef.ll


Index: llvm/test/CodeGen/AMDGPU/select-undef.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/select-undef.ll
+++ llvm/test/CodeGen/AMDGPU/select-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalar-ir-passes=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}select_undef_lhs:
 ; GCN: s_waitcnt
@@ -6,8 +6,7 @@
 ; GCN-NOT: v_cndmask
 ; GCN-NEXT: s_setpc_b64
 define float @select_undef_lhs(float %val, i1 %cond) {
-  %undef = call float @llvm.amdgcn.rcp.f32(float undef)
-  %sel = select i1 %cond, float %undef, float %val
+  %sel = select i1 %cond, float undef, float %val
   ret float %sel
 }
 
@@ -17,8 +16,7 @@
 ; GCN-NOT: v_cndmask
 ; GCN-NEXT: s_setpc_b64
 define float @select_undef_rhs(float %val, i1 %cond) {
-  %undef = call float @llvm.amdgcn.rcp.f32(float undef)
-  %sel = select i1 %cond, float %val, float %undef
+  %sel = select i1 %cond, float %val, float undef
   ret float %sel
 }
 
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -7,7 +7,9 @@
 declare float @llvm.sqrt.f32(float) #0
 
 ; FUNC-LABEL: {{^}}rcp_undef_f32:
-; SI-NOT: v_rcp_f32
+; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
+; SI-NOT: [[NAN]]
+; SI: buffer_store_dword [[NAN]]
 define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {
   %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
   store float %rcp, float addrspace(1)* %out, align 4
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9988,8 +9988,11 @@
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
 
-  if (N0.isUndef())
-    return N0;
+  if (N0.isUndef()) {
+    return DCI.DAG.getConstantFP(
+        APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)), SDLoc(N),
+        VT);
+  }
 
   if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP ||
                          N0.getOpcode() == ISD::SINT_TO_FP)) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D136085.468207.patch
Type: text/x-patch
Size: 2327 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221017/3762d19a/attachment-0001.bin>