[llvm] r276823 - AMDGPU: Use rcp for fdiv 1, x with fpmath metadata
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 26 16:25:44 PDT 2016
Author: arsenm
Date: Tue Jul 26 18:25:44 2016
New Revision: 276823
URL: http://llvm.org/viewvc/llvm-project?rev=276823&view=rev
Log:
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata
Using rcp should be OK for safe math usually, so this
should not be replacing the original fdiv.
Removed:
llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll
llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp?rev=276823&r1=276822&r2=276823&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp Tue Jul 26 18:25:44 2016
@@ -76,7 +76,7 @@ static bool shouldKeepFDivF32(Value *Num
return false;
// Reciprocal f32 is handled separately without denormals.
- return UnsafeDiv && CNum->isExactlyValue(+1.0);
+ return UnsafeDiv || CNum->isExactlyValue(+1.0);
}
// Insert an intrinsic for fast fdiv for safe math situations where we can
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll?rev=276823&r1=276822&r2=276823&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll Tue Jul 26 18:25:44 2016
@@ -45,6 +45,7 @@ define void @fdiv_fpmath(float addrspace
; CHECK-LABEL: @rcp_fdiv_fpmath(
; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
+; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
@@ -54,6 +55,9 @@ define void @rcp_fdiv_fpmath(float addrs
%no.md = fdiv float 1.0, %x
store volatile float %no.md, float addrspace(1)* %out
+ %md.25ulp = fdiv float 1.0, %x, !fpmath !0
+ store volatile float %md.25ulp, float addrspace(1)* %out
+
%md.half.ulp = fdiv float 1.0, %x, !fpmath !1
store volatile float %md.half.ulp, float addrspace(1)* %out
@@ -146,13 +150,13 @@ define void @rcp_fdiv_fpmath_vector(<2 x
; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0
; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
-; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float 2.000000e+00, float %[[X1]]), !fpmath !0
+; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0
; CHECK: store volatile <2 x float> %arcp.25ulp
; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0
; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
-; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float 2.000000e+00, float %[[X1]]), !fpmath !0
+; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0
; CHECK: store volatile <2 x float> %fast.25ulp
define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
%no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
Modified: llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll?rev=276823&r1=276822&r2=276823&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll Tue Jul 26 18:25:44 2016
@@ -1,11 +1,96 @@
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FIXME: Evergreen only ever does unsafe fp math.
; FUNC-LABEL: {{^}}rcp_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
; EG: RECIP_IEEE
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
%rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %rcp = fdiv float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %rcp = fdiv fast float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %rcp = fdiv arcp float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
+ %rcp = fdiv float 1.0, %src, !fpmath !0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_fabs_pat_f32:
+; GCN: s_load_dword [[SRC:s[0-9]+]]
+; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]|
+; GCN: buffer_store_dword [[RCP]]
+
+; EG: RECIP_IEEE
+define void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %src.fabs = call float @llvm.fabs.f32(float %src)
+ %rcp = fdiv float 1.0, %src.fabs
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FIXME: fneg folded into constant 1
+; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32:
+define void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
+ %src.fabs = call float @llvm.fabs.f32(float %src)
+ %src.fabs.fneg = fsub float -0.0, %src.fabs
+ %rcp = fdiv float 1.0, %src.fabs.fneg
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+
+declare float @llvm.fabs.f32(float) #1
+
+attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "unsafe-fp-math"="true" }
+
+!0 = !{float 2.500000e+00}
Removed: llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll?rev=276822&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/reciprocal.ll (removed)
@@ -1,13 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define amdgpu_ps void @test(<4 x float> inreg %reg0) {
- %r0 = extractelement <4 x float> %reg0, i32 0
- %r1 = fdiv float 1.0, %r0
- %vec = insertelement <4 x float> undef, float %r1, i32 0
- call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
- ret void
-}
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
More information about the llvm-commits
mailing list