[llvm] [AMDGPU] Support llvm.amdgcn.wave.id on gfx942/gfx950 (PR #164063)

via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 18 01:46:45 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Hongyu Chen (XChy)

<details>
<summary>Changes</summary>

A partial fix of #<!-- -->163835.
As CDNA3 and CDNA4 documents, TTMP11 is initialized with `{ 26’b0, wave_id_in_workgroup[5:0] }`. This patch supports this feature on gfx942/gfx950 by copying the wave ID from TTMP11.

---
Full diff: https://github.com/llvm/llvm-project/pull/164063.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+20-10) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+13-6) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll (+32) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ee466ca20bde3..cab9e1e63acbf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7549,17 +7549,27 @@ bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
 
 bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
                                          MachineIRBuilder &B) const {
-  // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
-  if (!ST.hasArchitectedSGPRs())
-    return false;
-  LLT S32 = LLT::scalar(32);
   Register DstReg = MI.getOperand(0).getReg();
-  auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8));
-  auto LSB = B.buildConstant(S32, 25);
-  auto Width = B.buildConstant(S32, 5);
-  B.buildUbfx(DstReg, TTMP8, LSB, Width);
-  MI.eraseFromParent();
-  return true;
+
+  // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
+  if (ST.hasArchitectedSGPRs()) {
+    LLT S32 = LLT::scalar(32);
+    auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8));
+    auto LSB = B.buildConstant(S32, 25);
+    auto Width = B.buildConstant(S32, 5);
+    B.buildUbfx(DstReg, TTMP8, LSB, Width);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // GFX942/GFX950 has wave_id_in_workgroup in ttmp11
+  if (ST.hasGFX940Insts()) {
+    B.buildCopy(DstReg, Register(AMDGPU::TTMP11));
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
 }
 
 bool AMDGPULegalizerInfo::legalizeConstHwRegRead(MachineInstr &MI,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 80e985d823746..7e45b29a6c630 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9617,14 +9617,21 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
 }
 
 SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
-  // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
-  if (!Subtarget->hasArchitectedSGPRs())
-    return {};
   SDLoc SL(Op);
   MVT VT = MVT::i32;
-  SDValue TTMP8 = DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT);
-  return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8,
-                     DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
+  // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
+  if (Subtarget->hasArchitectedSGPRs()) {
+    SDValue TTMP8 =
+        DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT);
+    return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8,
+                       DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
+  }
+
+  // GFX942/GFX950 has wave_id_in_workgroup in ttmp11.
+  if (Subtarget->hasGFX940Insts())
+    return DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP11, VT);
+
+  return {};
 }
 
 SDValue SITargetLowering::lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
index c597693d5a5f9..6270e41f603f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
@@ -3,6 +3,10 @@
 ; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -global-isel=1 -new-reg-bank-select < %s 2>&1 | FileCheck -check-prefix=GFX9-GISEL-ERR %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 -global-isel=0 < %s | FileCheck -check-prefixes=GFX942 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX942 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx950 -global-isel=0 < %s | FileCheck -check-prefixes=GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx950 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX950 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefix=GFX1200 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX1200 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -global-isel=0 < %s | FileCheck -check-prefix=GFX1250 %s
@@ -19,6 +23,18 @@ define amdgpu_cs void @test_wave_id(ptr addrspace(1) %out) {
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
+; GFX942-LABEL: test_wave_id:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    v_mov_b32_e32 v2, ttmp11
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off
+; GFX942-NEXT:    s_endpgm
+;
+; GFX950-LABEL: test_wave_id:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    v_mov_b32_e32 v2, ttmp11
+; GFX950-NEXT:    global_store_dword v[0:1], v2, off
+; GFX950-NEXT:    s_endpgm
+;
 ; GFX1200-LABEL: test_wave_id:
 ; GFX1200:       ; %bb.0:
 ; GFX1200-NEXT:    s_bfe_u32 s0, ttmp8, 0x50019
@@ -49,6 +65,22 @@ define amdgpu_gfx void @test_wave_id_callable(ptr addrspace(1) %out) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX942-LABEL: test_wave_id_callable:
+; GFX942:       ; %bb.0:
+; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    v_mov_b32_e32 v2, ttmp11
+; GFX942-NEXT:    global_store_dword v[0:1], v2, off
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
+; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: test_wave_id_callable:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_mov_b32_e32 v2, ttmp11
+; GFX950-NEXT:    global_store_dword v[0:1], v2, off
+; GFX950-NEXT:    s_waitcnt vmcnt(0)
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX1200-LABEL: test_wave_id_callable:
 ; GFX1200:       ; %bb.0:
 ; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0

``````````

</details>


https://github.com/llvm/llvm-project/pull/164063


More information about the llvm-commits mailing list