[llvm] ada6aa3 - AMDGPU: Fold undef rcp to qnan
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 4 15:49:44 PDT 2022
Author: Matt Arsenault
Date: 2022-11-04T15:49:37-07:00
New Revision: ada6aa3f5c9693130747549f99b6bb27cff58f2f
URL: https://github.com/llvm/llvm-project/commit/ada6aa3f5c9693130747549f99b6bb27cff58f2f
DIFF: https://github.com/llvm/llvm-project/commit/ada6aa3f5c9693130747549f99b6bb27cff58f2f.diff
LOG: AMDGPU: Fold undef rcp to qnan
This matches the behavior in instcombine, and for fdiv.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
llvm/test/CodeGen/AMDGPU/select-undef.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 347b0ee9d3b4..64ebf1d2d8b6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9991,8 +9991,11 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
- if (N0.isUndef())
- return N0;
+ if (N0.isUndef()) {
+ return DCI.DAG.getConstantFP(
+ APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)), SDLoc(N),
+ VT);
+ }
if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP ||
N0.getOpcode() == ISD::SINT_TO_FP)) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
index db6ee7bd0aeb..929f935f6910 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -7,7 +7,9 @@ declare double @llvm.sqrt.f64(double) #0
declare float @llvm.sqrt.f32(float) #0
; FUNC-LABEL: {{^}}rcp_undef_f32:
-; SI-NOT: v_rcp_f32
+; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
+; SI-NOT: [[NAN]]
+; SI: buffer_store_dword [[NAN]]
define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {
%rcp = call float @llvm.amdgcn.rcp.f32(float undef)
store float %rcp, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/select-undef.ll b/llvm/test/CodeGen/AMDGPU/select-undef.ll
index f02cd3fc5e4e..81deec1e0dbb 100644
--- a/llvm/test/CodeGen/AMDGPU/select-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalar-ir-passes=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}select_undef_lhs:
; GCN: s_waitcnt
@@ -6,8 +6,7 @@
; GCN-NOT: v_cndmask
; GCN-NEXT: s_setpc_b64
define float @select_undef_lhs(float %val, i1 %cond) {
- %undef = call float @llvm.amdgcn.rcp.f32(float undef)
- %sel = select i1 %cond, float %undef, float %val
+ %sel = select i1 %cond, float undef, float %val
ret float %sel
}
@@ -17,8 +16,7 @@ define float @select_undef_lhs(float %val, i1 %cond) {
; GCN-NOT: v_cndmask
; GCN-NEXT: s_setpc_b64
define float @select_undef_rhs(float %val, i1 %cond) {
- %undef = call float @llvm.amdgcn.rcp.f32(float undef)
- %sel = select i1 %cond, float %val, float %undef
+ %sel = select i1 %cond, float %val, float undef
ret float %sel
}
More information about the llvm-commits
mailing list