[llvm] 2d39f5b - [AMDGPU] Allow use of TTMP registers in AMDGPUResourceUsageAnalysis
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 13 08:59:39 PDT 2023
Author: Jay Foad
Date: 2023-04-13T16:56:22+01:00
New Revision: 2d39f5b5cdcb87fe968001586ab7949939099cfe
URL: https://github.com/llvm/llvm-project/commit/2d39f5b5cdcb87fe968001586ab7949939099cfe
DIFF: https://github.com/llvm/llvm-project/commit/2d39f5b5cdcb87fe968001586ab7949939099cfe.diff
LOG: [AMDGPU] Allow use of TTMP registers in AMDGPUResourceUsageAnalysis
With architected SGPRs, workgroup IDs are passed into a compute shader
in TTMP registers. Allow for this in AMDGPUResourceUsageAnalysis instead
of failing an assertion.
Differential Revision: https://reviews.llvm.org/D148239
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 6069636c5a5bd..79fff01e8b6bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -338,11 +338,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
break;
}
- if (AMDGPU::SReg_32RegClass.contains(Reg) ||
- AMDGPU::SReg_LO16RegClass.contains(Reg) ||
+ if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
+ AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
- "trap handler registers should not be used");
IsSGPR = true;
Width = 1;
} else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
@@ -355,9 +353,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsSGPR = false;
IsAGPR = true;
Width = 1;
- } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
- "trap handler registers should not be used");
+ } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
IsSGPR = true;
Width = 2;
} else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
@@ -377,9 +373,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsSGPR = false;
IsAGPR = true;
Width = 3;
- } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
- "trap handler registers should not be used");
+ } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
IsSGPR = true;
Width = 4;
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
@@ -420,8 +414,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsAGPR = true;
Width = 7;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
- "trap handler registers should not be used");
IsSGPR = true;
Width = 8;
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
@@ -472,8 +464,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsAGPR = true;
Width = 12;
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
- "trap handler registers should not be used");
IsSGPR = true;
Width = 16;
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
@@ -494,7 +484,12 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsAGPR = true;
Width = 32;
} else {
- llvm_unreachable("Unknown register class");
+ assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
+ AMDGPU::TTMP_64RegClass.contains(Reg) ||
+ AMDGPU::TTMP_128RegClass.contains(Reg) ||
+ AMDGPU::TTMP_256RegClass.contains(Reg) ||
+ AMDGPU::TTMP_512RegClass.contains(Reg)) &&
+ "Unknown register class");
}
unsigned HWReg = TRI.getHWRegIndex(Reg);
int MaxUsed = HWReg + Width - 1;
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
index 83ea07ab4d924..f67b0c9e0e00f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s
+
define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-SDAG-LABEL: _amdgpu_cs_main:
; GFX9-SDAG: ; %bb.0: ; %.entry
@@ -33,6 +34,53 @@ define amdgpu_cs void @_amdgpu_cs_main() {
ret void
}
+define amdgpu_cs void @caller() {
+; GFX9-SDAG-LABEL: caller:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9]
+; GFX9-SDAG-NEXT: s_mov_b32 s8, s0
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_add_u32 s8, s8, s0
+; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1]
+; GFX9-SDAG-NEXT: s_add_u32 s0, s0, callee at gotpcrel32@lo+4
+; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, callee at gotpcrel32@hi+12
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9]
+; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: caller:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9]
+; GFX9-GISEL-NEXT: s_mov_b32 s8, s0
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_add_u32 s8, s8, s0
+; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1]
+; GFX9-GISEL-NEXT: s_add_u32 s0, s0, callee at gotpcrel32@lo+4
+; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, callee at gotpcrel32@hi+12
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9]
+; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX9-GISEL-NEXT: s_endpgm
+ %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+ call amdgpu_gfx void @callee(i32 %idx)
+ ret void
+}
+
+declare amdgpu_gfx void @callee(i32)
+
declare i32 @llvm.amdgcn.workgroup.id.x()
declare i32 @llvm.amdgcn.workgroup.id.y()
declare i32 @llvm.amdgcn.workgroup.id.z()
More information about the llvm-commits
mailing list