[llvm] [AMDGPU] Handle llvm.amdgcn.pops.exiting.wave.id with calls (PR #98614)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 12 03:38:01 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/98614.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (+1-3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll (+98-32) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 326d0fa58dd15..2fe9cd242ff19 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -290,14 +290,12 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
         case AMDGPU::SRC_PRIVATE_BASE:
         case AMDGPU::SRC_PRIVATE_LIMIT_LO:
         case AMDGPU::SRC_PRIVATE_LIMIT:
+        case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
         case AMDGPU::SGPR_NULL:
         case AMDGPU::SGPR_NULL64:
         case AMDGPU::MODE:
           continue;
 
-        case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
-          llvm_unreachable("src_pops_exiting_wave_id should not be used");
-
         case AMDGPU::NoRegister:
           assert(MI.isDebugInstr() &&
                  "Instruction uses invalid noreg register");
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
index f3c5ac757e22b..dbe95a8091932 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,SDAG,GFX9-SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,SDAG,GFX10-SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
+
+declare void @foo(i32)
 
 define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
 ; SDAG-LABEL: test:
@@ -34,35 +36,25 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
 }
 
 define amdgpu_ps void @test_loop() {
-; SDAG-LABEL: test_loop:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:  .LBB1_1: ; %loop
-; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
-; SDAG-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
-; SDAG-NEXT:    s_cmp_eq_u32 s0, 0
-; SDAG-NEXT:    s_cbranch_scc1 .LBB1_1
-; SDAG-NEXT:  ; %bb.2: ; %exit
-; SDAG-NEXT:    s_endpgm
+; GFX9-LABEL: test_loop:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:  .LBB1_1: ; %loop
+; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-NEXT:    s_cmp_eq_u32 s0, 0
+; GFX9-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX9-NEXT:  ; %bb.2: ; %exit
+; GFX9-NEXT:    s_endpgm
 ;
-; GFX9-GISEL-LABEL: test_loop:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:  .LBB1_1: ; %loop
-; GFX9-GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
-; GFX9-GISEL-NEXT:    s_cmp_eq_u32 s0, 0
-; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
-; GFX9-GISEL-NEXT:  ; %bb.2: ; %exit
-; GFX9-GISEL-NEXT:    s_endpgm
-;
-; GFX10-GISEL-LABEL: test_loop:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:  .LBB1_1: ; %loop
-; GFX10-GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
-; GFX10-GISEL-NEXT:    s_cmp_eq_u32 s0, 0
-; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
-; GFX10-GISEL-NEXT:  ; %bb.2: ; %exit
-; GFX10-GISEL-NEXT:    s_endpgm
+; GFX10-LABEL: test_loop:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:  .LBB1_1: ; %loop
+; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-NEXT:    s_cmp_eq_u32 s0, 0
+; GFX10-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX10-NEXT:  ; %bb.2: ; %exit
+; GFX10-NEXT:    s_endpgm
   br label %loop
 loop:
   %id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
@@ -117,3 +109,77 @@ exit:
   %id = phi i32 [ %id1, %entry ], [ %id2, %body ]
   ret i32 %id
 }
+
+define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
+; GFX9-SDAG-LABEL: test_call:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-SDAG-NEXT:    s_mov_b32 s38, -1
+; GFX9-SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; GFX9-SDAG-NEXT:    s_add_u32 s36, s36, s2
+; GFX9-SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; GFX9-SDAG-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, foo at gotpcrel32@lo+4
+; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
+; GFX9-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-SDAG-NEXT:    s_mov_b32 s6, src_pops_exiting_wave_id
+; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GFX9-SDAG-NEXT:    s_mov_b64 s[8:9], 36
+; GFX9-SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: test_call:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-GISEL-NEXT:    s_mov_b32 s38, -1
+; GFX9-GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GFX9-GISEL-NEXT:    s_add_u32 s36, s36, s2
+; GFX9-GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GFX9-GISEL-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, foo at gotpcrel32@lo+4
+; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
+; GFX9-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-GISEL-NEXT:    s_mov_b32 s2, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GFX9-GISEL-NEXT:    s_mov_b64 s[8:9], 36
+; GFX9-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX10-LABEL: test_call:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT:    s_mov_b32 s38, -1
+; GFX10-NEXT:    s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT:    s_add_u32 s36, s36, s2
+; GFX10-NEXT:    s_addc_u32 s37, s37, 0
+; GFX10-NEXT:    s_getpc_b64 s[0:1]
+; GFX10-NEXT:    s_add_u32 s0, s0, foo at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
+; GFX10-NEXT:    s_mov_b64 s[8:9], 36
+; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX10-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-NEXT:    s_mov_b32 s32, 0
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX10-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
+  call void @foo(i32 %id)
+  ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX10-SDAG: {{.*}}

``````````

</details>


https://github.com/llvm/llvm-project/pull/98614


More information about the llvm-commits mailing list