[llvm] lower waveid on GFX9 (PR #165332)

Zeng Wu via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 29 01:00:17 PDT 2025


https://github.com/zwu-2025 updated https://github.com/llvm/llvm-project/pull/165332

>From 9010ac8fd29a1e7f5c55e62f0ae27704c3422df0 Mon Sep 17 00:00:00 2001
From: test <test at amd.com>
Date: Mon, 27 Oct 2025 00:00:29 -0500
Subject: [PATCH 1/2] waveid

---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |  3 +++
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ded00b1274670..8974262df56f1 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2839,6 +2839,9 @@ def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
 def int_amdgcn_pops_exiting_wave_id :
   DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
 
+// i32 @llvm.amdgcn.gfx9_wave_id(i32)
+def int_amdgcn_gfx9_wave_id : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+
 //===----------------------------------------------------------------------===//
 // GFX10 Intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 16530087444d2..a671de619d426 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9874,6 +9874,20 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_wave_id:
     return lowerWaveID(DAG, Op);
+  case Intrinsic::amdgcn_gfx9_wave_id: {
+    MVT VT = MVT::i32;
+    auto UpperBound = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VT);
+
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    unsigned MaxID = Subtarget->getMaxWorkitemID(MF.getFunction(), 0);
+    const ArgDescriptor Arg = MFI->getArgInfo().WorkItemIDX;
+    SDValue Val = loadInputValue(DAG, &AMDGPU::SGPR_32RegClass, MVT::i32,
+                                 SDLoc(DAG.getEntryNode()), Arg);
+    SDValue Bounded = DAG.getNode(ISD::AND, DL, VT, Val, UpperBound);
+    SDValue WaveFrontSize =  DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
+                           SDLoc(Op), MVT::i32);
+    return DAG.getNode(ISD::SDIV, DL, VT, Bounded, WaveFrontSize);
+  }
   case Intrinsic::amdgcn_lds_kernel_id: {
     if (MFI->isEntryFunction())
       return getLDSKernelId(DAG, DL);

>From 26a2e9e82bcac3139ab6ea4cf7486f0dfc8c19b0 Mon Sep 17 00:00:00 2001
From: test <test at amd.com>
Date: Wed, 29 Oct 2025 03:00:02 -0500
Subject: [PATCH 2/2] waveid

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a671de619d426..b6f7ddec127c6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -31,10 +31,12 @@
 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/SDPatternMatch.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -9876,17 +9878,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return lowerWaveID(DAG, Op);
   case Intrinsic::amdgcn_gfx9_wave_id: {
     MVT VT = MVT::i32;
-    auto UpperBound = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VT);
+    auto Ratio = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VT);
 
-    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-    unsigned MaxID = Subtarget->getMaxWorkitemID(MF.getFunction(), 0);
-    const ArgDescriptor Arg = MFI->getArgInfo().WorkItemIDX;
-    SDValue Val = loadInputValue(DAG, &AMDGPU::SGPR_32RegClass, MVT::i32,
-                                 SDLoc(DAG.getEntryNode()), Arg);
-    SDValue Bounded = DAG.getNode(ISD::AND, DL, VT, Val, UpperBound);
-    SDValue WaveFrontSize =  DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
-                           SDLoc(Op), MVT::i32);
-    return DAG.getNode(ISD::SDIV, DL, VT, Bounded, WaveFrontSize);
+    SDValue WorkGrpId = lowerWorkGroupId(DAG, *MFI, VT,
+                            AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
+
+    return DAG.getNode(ISD::MUL, DL, VT, WorkGrpId, Ratio);
   }
   case Intrinsic::amdgcn_lds_kernel_id: {
     if (MFI->isEntryFunction())



More information about the llvm-commits mailing list