[llvm] r338376 - AMDGPU: Fold undef fcanonicalize to qNaN

Tue Jul 31 06:34:31 PDT 2018

Author: arsenm
Date: Tue Jul 31 06:34:31 2018
New Revision: 338376

URL: http://llvm.org/viewvc/llvm-project?rev=338376&view=rev
Log:
AMDGPU: Fold undef fcanonicalize to qNaN

We could choose a free 0 for this, but this
matches the behavior for fmul undef, 1.0. Also,
the NaN use is more useful for folding use operations
although if it's not eliminated it is more expensive
in terms of code size.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=338376&r1=338375&r2=338376&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jul 31 06:34:31 2018
@@ -6845,8 +6845,16 @@ SDValue SITargetLowering::performFCanoni
   SDNode *N,
   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
-  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+  SDValue N0 = N->getOperand(0);
 
+  // fcanonicalize undef -> qnan
+  if (N0.isUndef()) {
+    EVT VT = N->getValueType(0);
+    APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+    return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+  }
+
+  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
   if (!CFP) {
     SDValue N0 = N->getOperand(0);
     EVT VT = N0.getValueType().getScalarType();
@@ -6899,7 +6907,7 @@ SDValue SITargetLowering::performFCanoni
       return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
   }
 
-  return N->getOperand(0);
+  return N0;
 }
 
 static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll?rev=338376&r1=338375&r2=338376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll Tue Jul 31 06:34:31 2018
@@ -62,6 +62,15 @@ define amdgpu_kernel void @v_test_canoni
   ret void
 }
 
+; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
+  %canonicalized = call float @llvm.canonicalize.f32(float undef)
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[REG]]