[llvm] 2d39f5b - [AMDGPU] Allow use of TTMP registers in AMDGPUResourceUsageAnalysis

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 13 08:59:39 PDT 2023


Author: Jay Foad
Date: 2023-04-13T16:56:22+01:00
New Revision: 2d39f5b5cdcb87fe968001586ab7949939099cfe

URL: https://github.com/llvm/llvm-project/commit/2d39f5b5cdcb87fe968001586ab7949939099cfe
DIFF: https://github.com/llvm/llvm-project/commit/2d39f5b5cdcb87fe968001586ab7949939099cfe.diff

LOG: [AMDGPU] Allow use of TTMP registers in AMDGPUResourceUsageAnalysis

With architected SGPRs, workgroup IDs are passed into a compute shader
in TTMP registers. Allow for this in AMDGPUResourceUsageAnalysis instead
of failing an assertion.

Differential Revision: https://reviews.llvm.org/D148239

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
    llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 6069636c5a5bd..79fff01e8b6bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -338,11 +338,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
           break;
         }
 
-        if (AMDGPU::SReg_32RegClass.contains(Reg) ||
-            AMDGPU::SReg_LO16RegClass.contains(Reg) ||
+        if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
+            AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
-                 "trap handler registers should not be used");
           IsSGPR = true;
           Width = 1;
         } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
@@ -355,9 +353,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
           IsSGPR = false;
           IsAGPR = true;
           Width = 1;
-        } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
-                 "trap handler registers should not be used");
+        } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
           IsSGPR = true;
           Width = 2;
         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
@@ -377,9 +373,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
           IsSGPR = false;
           IsAGPR = true;
           Width = 3;
-        } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
-                 "trap handler registers should not be used");
+        } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
           IsSGPR = true;
           Width = 4;
         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
@@ -420,8 +414,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
           IsAGPR = true;
           Width = 7;
         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
-                 "trap handler registers should not be used");
           IsSGPR = true;
           Width = 8;
         } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
@@ -472,8 +464,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
           IsAGPR = true;
           Width = 12;
         } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
-          assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
-                 "trap handler registers should not be used");
           IsSGPR = true;
           Width = 16;
         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
@@ -494,7 +484,12 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
           IsAGPR = true;
           Width = 32;
         } else {
-          llvm_unreachable("Unknown register class");
+          assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_64RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_128RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_256RegClass.contains(Reg) ||
+                  AMDGPU::TTMP_512RegClass.contains(Reg)) &&
+                 "Unknown register class");
         }
         unsigned HWReg = TRI.getHWRegIndex(Reg);
         int MaxUsed = HWReg + Width - 1;

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
index 83ea07ab4d924..f67b0c9e0e00f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s
+
 define amdgpu_cs void @_amdgpu_cs_main() {
 ; GFX9-SDAG-LABEL: _amdgpu_cs_main:
 ; GFX9-SDAG:       ; %bb.0: ; %.entry
@@ -33,6 +34,53 @@ define amdgpu_cs void @_amdgpu_cs_main() {
   ret void
 }
 
+define amdgpu_cs void @caller() {
+; GFX9-SDAG-LABEL: caller:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_getpc_b64 s[8:9]
+; GFX9-SDAG-NEXT:    s_mov_b32 s8, s0
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_add_u32 s8, s8, s0
+; GFX9-SDAG-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-SDAG-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, callee at gotpcrel32@lo+4
+; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, callee at gotpcrel32@hi+12
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GFX9-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: caller:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_getpc_b64 s[8:9]
+; GFX9-GISEL-NEXT:    s_mov_b32 s8, s0
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_add_u32 s8, s8, s0
+; GFX9-GISEL-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-GISEL-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, callee at gotpcrel32@lo+4
+; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, callee at gotpcrel32@hi+12
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-GISEL-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-GISEL-NEXT:    s_endpgm
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  call amdgpu_gfx void @callee(i32 %idx)
+  ret void
+}
+
+declare amdgpu_gfx void @callee(i32)
+
 declare i32 @llvm.amdgcn.workgroup.id.x()
 declare i32 @llvm.amdgcn.workgroup.id.y()
 declare i32 @llvm.amdgcn.workgroup.id.z()


        


More information about the llvm-commits mailing list