[llvm] 52c44a4 - AMDGPU: Modernize sqrt f64 test

Thu Dec 22 10:01:47 PST 2022

Author: Matt Arsenault
Date: 2022-12-22T13:01:41-05:00
New Revision: 52c44a441c25a4333e1235095bb73c5115856125

URL: https://github.com/llvm/llvm-project/commit/52c44a441c25a4333e1235095bb73c5115856125
DIFF: https://github.com/llvm/llvm-project/commit/52c44a441c25a4333e1235095bb73c5115856125.diff

LOG: AMDGPU: Modernize sqrt f64 test

Use the readfirstlane hack for the scalar cases as a hack to
combine globalisel and sdag tests. gfx6 stores are a bit broken
in globalisel, and scalar returns are totally broken in sdag.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index a53d3956522a..dbf38717c5c1 100644

--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1,26 +1,123 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-; FUNC-LABEL: {{^}}v_safe_fsqrt_f64:
-; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @v_safe_fsqrt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %r0 = load double, ptr addrspace(1) %in
-  %r1 = call double @llvm.sqrt.f64(double %r0)
-  store double %r1, ptr addrspace(1) %out
-  ret void
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define double @v_sqrt_f64(double %x) {
+; GCN-LABEL: v_sqrt_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_fneg(double %x) {
+; GCN-LABEL: v_sqrt_f64_fneg:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e64 v[0:1], -v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %x.neg = fneg double %x
+  %result = call double @llvm.sqrt.f64(double %x.neg)
+  ret double %result
+}
+
+define double @v_sqrt_f64_fabs(double %x) {
+; GCN-LABEL: v_sqrt_f64_fabs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e64 v[0:1], |v[0:1]|
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %x.fabs = call double @llvm.fabs.f64(double %x)
+  %result = call double @llvm.sqrt.f64(double %x.fabs)
+  ret double %result
+}
+
+define double @v_sqrt_f64_fneg_fabs(double %x) {
+; GCN-LABEL: v_sqrt_f64_fneg_fabs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e64 v[0:1], -|v[0:1]|
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %x.fabs = call double @llvm.fabs.f64(double %x)
+  %x.fabs.neg = fneg double %x.fabs
+  %result = call double @llvm.sqrt.f64(double %x.fabs.neg)
+  ret double %result
+}
+
+define double @v_sqrt_f64_ninf(double %x) {
+; GCN-LABEL: v_sqrt_f64_ninf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call ninf double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" {
+; GCN-LABEL: v_sqrt_f64_no_infs_attribute:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call ninf double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define double @v_sqrt_f64_nnan(double %x) {
+; GCN-LABEL: v_sqrt_f64_nnan:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], v[0:1]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call nnan double @llvm.sqrt.f64(double %x)
+  ret double %result
+}
+
+define amdgpu_ps <2 x i32> @s_sqrt_f64(double inreg %x) {
+; GCN-LABEL: s_sqrt_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_readfirstlane_b32 s0, v0
+; GCN-NEXT:    v_readfirstlane_b32 s1, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %result = call double @llvm.sqrt.f64(double %x)
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
 }
 
-; FUNC-LABEL: {{^}}v_unsafe_fsqrt_f64:
-; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @v_unsafe_fsqrt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
-  %r0 = load double, ptr addrspace(1) %in
-  %r1 = call double @llvm.sqrt.f64(double %r0)
-  store double %r1, ptr addrspace(1) %out
-  ret void
+define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
+; GCN-LABEL: s_sqrt_f64_ninf:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sqrt_f64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_readfirstlane_b32 s0, v0
+; GCN-NEXT:    v_readfirstlane_b32 s1, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %result = call ninf double @llvm.sqrt.f64(double %x)
+  %cast = bitcast double %result to <2 x i32>
+  %cast.0 = extractelement <2 x i32> %cast, i32 0
+  %cast.1 = extractelement <2 x i32> %cast, i32 1
+  %lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
+  %lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
+  %insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
+  %insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
+  ret <2 x i32> %insert.1
 }
 
-declare double @llvm.sqrt.f64(double %Val) #0
+declare double @llvm.fabs.f64(double) #0
+declare double @llvm.sqrt.f64(double) #0
+declare i32 @llvm.amdgcn.readfirstlane(i32) #1
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #1 = { convergent nounwind willreturn memory(none) }