[llvm] [AMDGPU] Support llvm.amdgcn.wave.id on gfx942/gfx950 (PR #164063)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 18 01:46:45 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Hongyu Chen (XChy)
<details>
<summary>Changes</summary>
A partial fix of #<!-- -->163835.
As CDNA3 and CDNA4 documents, TTMP11 is initialized with `{ 26’b0, wave_id_in_workgroup[5:0] }`. This patch supports this feature on gfx942/gfx950 by copying the wave ID from TTMP11.
---
Full diff: https://github.com/llvm/llvm-project/pull/164063.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+20-10)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+13-6)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll (+32)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ee466ca20bde3..cab9e1e63acbf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7549,17 +7549,27 @@ bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
MachineIRBuilder &B) const {
- // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
- if (!ST.hasArchitectedSGPRs())
- return false;
- LLT S32 = LLT::scalar(32);
Register DstReg = MI.getOperand(0).getReg();
- auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8));
- auto LSB = B.buildConstant(S32, 25);
- auto Width = B.buildConstant(S32, 5);
- B.buildUbfx(DstReg, TTMP8, LSB, Width);
- MI.eraseFromParent();
- return true;
+
+ // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
+ if (ST.hasArchitectedSGPRs()) {
+ LLT S32 = LLT::scalar(32);
+ auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8));
+ auto LSB = B.buildConstant(S32, 25);
+ auto Width = B.buildConstant(S32, 5);
+ B.buildUbfx(DstReg, TTMP8, LSB, Width);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // GFX942/GFX950 has wave_id_in_workgroup in ttmp11
+ if (ST.hasGFX940Insts()) {
+ B.buildCopy(DstReg, Register(AMDGPU::TTMP11));
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
}
bool AMDGPULegalizerInfo::legalizeConstHwRegRead(MachineInstr &MI,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 80e985d823746..7e45b29a6c630 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9617,14 +9617,21 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
}
SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
- // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
- if (!Subtarget->hasArchitectedSGPRs())
- return {};
SDLoc SL(Op);
MVT VT = MVT::i32;
- SDValue TTMP8 = DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT);
- return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8,
- DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
+ // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
+ if (Subtarget->hasArchitectedSGPRs()) {
+ SDValue TTMP8 =
+ DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT);
+ return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8,
+ DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
+ }
+
+ // GFX942/GFX950 has wave_id_in_workgroup in ttmp11.
+ if (Subtarget->hasGFX940Insts())
+ return DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP11, VT);
+
+ return {};
}
SDValue SITargetLowering::lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
index c597693d5a5f9..6270e41f603f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
@@ -3,6 +3,10 @@
; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -global-isel=1 -new-reg-bank-select < %s 2>&1 | FileCheck -check-prefix=GFX9-GISEL-ERR %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 -global-isel=0 < %s | FileCheck -check-prefixes=GFX942 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX942 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx950 -global-isel=0 < %s | FileCheck -check-prefixes=GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx950 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefix=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -global-isel=0 < %s | FileCheck -check-prefix=GFX1250 %s
@@ -19,6 +23,18 @@ define amdgpu_cs void @test_wave_id(ptr addrspace(1) %out) {
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
;
+; GFX942-LABEL: test_wave_id:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: v_mov_b32_e32 v2, ttmp11
+; GFX942-NEXT: global_store_dword v[0:1], v2, off
+; GFX942-NEXT: s_endpgm
+;
+; GFX950-LABEL: test_wave_id:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: v_mov_b32_e32 v2, ttmp11
+; GFX950-NEXT: global_store_dword v[0:1], v2, off
+; GFX950-NEXT: s_endpgm
+;
; GFX1200-LABEL: test_wave_id:
; GFX1200: ; %bb.0:
; GFX1200-NEXT: s_bfe_u32 s0, ttmp8, 0x50019
@@ -49,6 +65,22 @@ define amdgpu_gfx void @test_wave_id_callable(ptr addrspace(1) %out) {
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX942-LABEL: test_wave_id_callable:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v2, ttmp11
+; GFX942-NEXT: global_store_dword v[0:1], v2, off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_wave_id_callable:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_mov_b32_e32 v2, ttmp11
+; GFX950-NEXT: global_store_dword v[0:1], v2, off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1200-LABEL: test_wave_id_callable:
; GFX1200: ; %bb.0:
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
``````````
</details>
https://github.com/llvm/llvm-project/pull/164063
More information about the llvm-commits
mailing list