[llvm] 5beb9a0 - AMDGPU: Respect compute ABI attributes with unknown OS

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 13 17:55:53 PDT 2021


Author: Matt Arsenault
Date: 2021-08-13T20:44:46-04:00
New Revision: 5beb9a0e6aec4a4901229377d8cb9e6115956446

URL: https://github.com/llvm/llvm-project/commit/5beb9a0e6aec4a4901229377d8cb9e6115956446
DIFF: https://github.com/llvm/llvm-project/commit/5beb9a0e6aec4a4901229377d8cb9e6115956446.diff

LOG: AMDGPU: Respect compute ABI attributes with unknown OS

Unfortunately Mesa is still using amdgcn-- as the triple for OpenGL,
so we still have the awkward unknown OS case to deal with. Previously
if the HSA ABI intrinsics appeared, we we would not add the ABI
registers to the function. We would emit an error later, but we still
need to produce some compile result. Start adding the registers to any
compute function, regardless of the OS. This keeps the internal state
more consistent, and will help avoid numerous test crashes in a future
patch which starts assuming the ABI inputs are present on functions by
default.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/test/CodeGen/AMDGPU/trap-abis.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 85cfe36df16aa..76bec48d77f19 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -151,10 +151,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   }
 
   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
-  if (isAmdHsaOrMesa) {
-    if (!ST.enableFlatScratch())
-      PrivateSegmentBuffer = true;
+  if (isAmdHsaOrMesa && !ST.enableFlatScratch())
+    PrivateSegmentBuffer = true;
+  else if (ST.isMesaGfxShader(F))
+    ImplicitBufferPtr = true;
+
+  if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
+    KernargSegmentPtr = true;
 
+  if (!AMDGPU::isGraphics(CC)) {
     if (UseFixedABI) {
       DispatchPtr = true;
       QueuePtr = true;
@@ -171,13 +176,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
       if (F.hasFnAttribute("amdgpu-dispatch-id"))
         DispatchID = true;
     }
-  } else if (ST.isMesaGfxShader(F)) {
-    ImplicitBufferPtr = true;
   }
 
-  if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
-    KernargSegmentPtr = true;
-
   // TODO: This could be refined a lot. The attribute is a poor way of
   // detecting calls or stack objects that may require it before argument
   // lowering.

diff  --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 4fe18cb5608a7..d3f6b7c9539db 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -18,7 +18,7 @@ declare void @llvm.debugtrap() #1
 define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
 ; NOHSA-TRAP-GFX900-V2-LABEL: trap:
 ; NOHSA-TRAP-GFX900-V2:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V2-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V2-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V2-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V2-NEXT:    v_mov_b32_e32 v1, 1
 ; NOHSA-TRAP-GFX900-V2-NEXT:    s_waitcnt lgkmcnt(0)
@@ -28,7 +28,7 @@ define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
 ;
 ; NOHSA-TRAP-GFX900-V3-LABEL: trap:
 ; NOHSA-TRAP-GFX900-V3:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
 ; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
@@ -38,7 +38,7 @@ define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
 ;
 ; NOHSA-TRAP-GFX900-V4-LABEL: trap:
 ; NOHSA-TRAP-GFX900-V4:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
 ; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
@@ -357,7 +357,7 @@ define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
 define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr {
 ; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap:
 ; NOHSA-TRAP-GFX900-V2:       ; %bb.0: ; %entry
-; NOHSA-TRAP-GFX900-V2-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V2-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V2-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V2-NEXT:    s_waitcnt lgkmcnt(0)
 ; NOHSA-TRAP-GFX900-V2-NEXT:    global_load_dword v1, v0, s[0:1] glc
@@ -375,7 +375,7 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
 ;
 ; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap:
 ; NOHSA-TRAP-GFX900-V3:       ; %bb.0: ; %entry
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
 ; NOHSA-TRAP-GFX900-V3-NEXT:    global_load_dword v1, v0, s[0:1] glc
@@ -393,7 +393,7 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %
 ;
 ; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
 ; NOHSA-TRAP-GFX900-V4:       ; %bb.0: ; %entry
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
 ; NOHSA-TRAP-GFX900-V4-NEXT:    global_load_dword v1, v0, s[0:1] glc
@@ -805,7 +805,7 @@ ret:
 define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) {
 ; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap:
 ; NOHSA-TRAP-GFX900-V2:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V2-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V2-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V2-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V2-NEXT:    v_mov_b32_e32 v1, 1
 ; NOHSA-TRAP-GFX900-V2-NEXT:    v_mov_b32_e32 v2, 2
@@ -818,7 +818,7 @@ define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0)
 ;
 ; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap:
 ; NOHSA-TRAP-GFX900-V3:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
 ; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v2, 2
@@ -831,7 +831,7 @@ define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0)
 ;
 ; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap:
 ; NOHSA-TRAP-GFX900-V4:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
 ; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
 ; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
 ; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v2, 2


        


More information about the llvm-commits mailing list