[llvm-branch-commits] [llvm] 94375d1 - AMDGPU: Remove v_rsq_f64 patterns

Thu Jan 21 07:55:58 PST 2021

Author: Matt Arsenault
Date: 2021-01-21T10:51:36-05:00
New Revision: 94375d1083ccc9187c2502894f1dad62d9dd92b9

URL: https://github.com/llvm/llvm-project/commit/94375d1083ccc9187c2502894f1dad62d9dd92b9
DIFF: https://github.com/llvm/llvm-project/commit/94375d1083ccc9187c2502894f1dad62d9dd92b9.diff

LOG: AMDGPU: Remove v_rsq_f64 patterns

This isn't accurate enough without correction

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
    llvm/test/CodeGen/AMDGPU/rsq.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 8baa803cd658..732e9d76127e 100644

--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -786,11 +786,9 @@ def : Pat <
 
 let OtherPredicates = [UnsafeFPMath] in {
 
-//defm : RsqPat<V_RSQ_F64_e32, f64>;
 //defm : RsqPat<V_RSQ_F32_e32, f32>;
 
 def : RsqPat<V_RSQ_F32_e32, f32>;
-def : RsqPat<V_RSQ_F64_e32, f64>;
 
 // Convert (x - floor(x)) to fract(x)
 def : GCNPat <

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
index 06bf0a903e34..db6ee7bd0aeb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
 
 declare float @llvm.amdgcn.rcp.f32(float) #0
 declare double @llvm.amdgcn.rcp.f64(double) #0
@@ -132,8 +132,8 @@ define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, doubl
 }
 
 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
-; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
-; SI-NOT: [[RESULT]]
+; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
   %sqrt = call double @llvm.sqrt.f64(double %src)

diff  --git a/llvm/test/CodeGen/AMDGPU/rsq.ll b/llvm/test/CodeGen/AMDGPU/rsq.ll
index a80eede42d5f..a6325388c3fe 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.ll
@@ -17,8 +17,8 @@ define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrs
 }
 
 ; SI-LABEL: {{^}}rsq_f64:
-; SI-UNSAFE: v_rsq_f64_e32
-; SI-SAFE: v_sqrt_f64_e32
+; SI: v_sqrt_f64
+; SI: v_rcp_f64
 ; SI: s_endpgm
 define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
   %val = load double, double addrspace(1)* %in, align 4
@@ -97,8 +97,8 @@ define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float a
 
 ; SI-UNSAFE: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
 ; SI-UNSAFE: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], [[VAL]]
-; SI-UNSAFE: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], [[VAL]]
-; SI-UNSAFE: v_fma_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, [[RSQ]], 1.0
+; SI-UNSAFE: v_rcp_f64_e32 [[RCP:v\[[0-9]+:[0-9]+\]]], [[VAL]]
+; SI-UNSAFE: v_fma_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, [[RCP]], 1.0
 ; SI-UNSAFE: v_fma_f64
 ; SI-UNSAFE: v_fma_f64
 ; SI-UNSAFE: v_fma_f64
@@ -135,8 +135,7 @@ define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, flo
 
 ; SI-UNSAFE: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
 ; SI-UNSAFE-DAG: v_sqrt_f64_e64 [[SQRT:v\[[0-9]+:[0-9]+\]]], -[[VAL]]
-; SI-UNSAFE-DAG: v_xor_b32_e32 v[[HI:[0-9]+]], 0x80000000, v{{[0-9]+}}
-; SI-UNSAFE: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+}}:[[HI]]{{\]}}
+; SI-UNSAFE: v_rcp_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], [[SQRT]]
 ; SI-UNSAFE: v_fma_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, [[RSQ]], 1.0
 ; SI-UNSAFE: v_fma_f64
 ; SI-UNSAFE: v_fma_f64