[llvm] r338376 - AMDGPU: Fold undef fcanonicalize to qNaN
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 31 06:34:31 PDT 2018
Author: arsenm
Date: Tue Jul 31 06:34:31 2018
New Revision: 338376
URL: http://llvm.org/viewvc/llvm-project?rev=338376&view=rev
Log:
AMDGPU: Fold undef fcanonicalize to qNaN
We could choose a free 0 for this, but this
matches the behavior for fmul undef, 1.0. Also,
the NaN use is more useful for folding use operations
although if it's not eliminated it is more expensive
in terms of code size.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=338376&r1=338375&r2=338376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jul 31 06:34:31 2018
@@ -6845,8 +6845,16 @@ SDValue SITargetLowering::performFCanoni
SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+ SDValue N0 = N->getOperand(0);
+ // fcanonicalize undef -> qnan
+ if (N0.isUndef()) {
+ EVT VT = N->getValueType(0);
+ APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+ return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+ }
+
+ ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
if (!CFP) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType().getScalarType();
@@ -6899,7 +6907,7 @@ SDValue SITargetLowering::performFCanoni
return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return N->getOperand(0);
+ return N0;
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll?rev=338376&r1=338375&r2=338376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.ll Tue Jul 31 06:34:31 2018
@@ -62,6 +62,15 @@ define amdgpu_kernel void @v_test_canoni
ret void
}
+; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
+ %canonicalized = call float @llvm.canonicalize.f32(float undef)
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[REG]]
More information about the llvm-commits
mailing list