[llvm] [AMDGPU] Architected SGPRs for GFX12 (PR #76140)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 21 01:52:59 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
Enable architected SGPRs and add a new @<!-- -->llvm.amdgcn.wave.id intrinsic.
---
Patch is 23.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76140.diff
11 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp (+8-6)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+6-1)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+11-6)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+9)
- (modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll (+44)
- (added) llvm/test/CodeGen/AMDGPU/wave-id-intrinsic.ll (+34)
- (modified) llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll (+111-68)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 51bd9b63c127ed..4eaa720aced5f9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2491,6 +2491,10 @@ def int_amdgcn_flat_atomic_fmax_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_global_atomic_fmin_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_global_atomic_fmax_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
+// i32 @llvm.amdgcn.wave.id()
+def int_amdgcn_wave_id :
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 060fb66d38f7bc..ed2eb05d94a1d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1471,6 +1471,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureWavefrontSize32,
FeatureShaderCyclesRegister,
FeatureArchitectedFlatScratch,
+ FeatureArchitectedSGPRs,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureFlatAtomicFaddF32Inst,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index de25f9241a5036..54562cb79a9a31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -75,15 +75,14 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
+ << " WaveID: " << FI.second.WaveID
<< " LDSKernelId: " << FI.second.LDSKernelId
<< " PrivateSegmentWaveByteOffset: "
- << FI.second.PrivateSegmentWaveByteOffset
+ << FI.second.PrivateSegmentWaveByteOffset
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
- << " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
- << " WorkItemIDX " << FI.second.WorkItemIDX
- << " WorkItemIDY " << FI.second.WorkItemIDY
- << " WorkItemIDZ " << FI.second.WorkItemIDZ
- << '\n';
+ << " ImplicitArgPtr: " << FI.second.ImplicitArgPtr << " WorkItemIDX "
+ << FI.second.WorkItemIDX << " WorkItemIDY " << FI.second.WorkItemIDY
+ << " WorkItemIDZ " << FI.second.WorkItemIDZ << '\n';
}
}
@@ -108,6 +107,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+ case AMDGPUFunctionArgInfo::WAVE_ID:
+ return std::tuple(WaveID ? &WaveID : nullptr, &AMDGPU::SGPR_32RegClass,
+ LLT::scalar(32));
case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
return std::tuple(LDSKernelId ? &LDSKernelId : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 42b33c50d9f8c4..d3e8394c4a7d46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -111,6 +111,7 @@ struct AMDGPUFunctionArgInfo {
WORKGROUP_ID_X = 10,
WORKGROUP_ID_Y = 11,
WORKGROUP_ID_Z = 12,
+ WAVE_ID = 13,
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
IMPLICIT_BUFFER_PTR = 15,
IMPLICIT_ARG_PTR = 16,
@@ -141,6 +142,7 @@ struct AMDGPUFunctionArgInfo {
ArgDescriptor WorkGroupIDY;
ArgDescriptor WorkGroupIDZ;
ArgDescriptor WorkGroupInfo;
+ ArgDescriptor WaveID;
ArgDescriptor PrivateSegmentWaveByteOffset;
// Pointer with offset from kernargsegmentptr to where special ABI arguments
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index fbee2888945185..ff029d6dc09a4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6922,6 +6922,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_workgroup_id_z:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_wave_id:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WAVE_ID);
case Intrinsic::amdgcn_lds_kernel_id:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4f4bc45e49b43e..213e35704f7407 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2431,6 +2431,9 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(Reg);
}
+ if (HasArchitectedSGPRs && Info.hasWaveID())
+ CCInfo.AllocateReg(Info.addWaveID());
+
if (Info.hasWorkGroupInfo()) {
Register Reg = Info.addWorkGroupInfo();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
@@ -2635,7 +2638,7 @@ SDValue SITargetLowering::LowerFormalArguments(
assert(!UserSGPRInfo.hasFlatScratchInit());
if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
- !Info->hasWorkGroupIDZ());
+ !Info->hasWorkGroupIDZ() && !Info->hasWaveID());
}
if (CallConv == CallingConv::AMDGPU_PS) {
@@ -7775,6 +7778,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_workgroup_id_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_wave_id:
+ return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WAVE_ID);
case Intrinsic::amdgcn_lds_kernel_id: {
if (MFI->isEntryFunction())
return getLDSKernelId(DAG, DL);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e8142244b7db69..84e33d2cd59592 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -39,10 +39,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget *STI)
: AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
- WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
- PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
- WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
- GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
+ WorkGroupIDZ(false), WorkGroupInfo(false), WaveID(false),
+ LDSKernelId(false), PrivateSegmentWaveByteOffset(false),
+ WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false),
+ ImplicitArgPtr(false), GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
@@ -107,8 +107,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
}
- if (!AMDGPU::isGraphics(CC) ||
- (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
+ bool HasArchitectedSGPRs =
+ CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs();
+ if (!AMDGPU::isGraphics(CC) || HasArchitectedSGPRs) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
@@ -135,6 +136,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
LDSKernelId = true;
}
+ // For GFX12+.
+ if (HasArchitectedSGPRs)
+ WaveID = true;
+
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index dc63ae44c528db..1c93514e06cec5 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -447,6 +447,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
bool WorkGroupIDY : 1;
bool WorkGroupIDZ : 1;
bool WorkGroupInfo : 1;
+ bool WaveID : 1;
bool LDSKernelId : 1;
bool PrivateSegmentWaveByteOffset : 1;
@@ -782,6 +783,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
return ArgInfo.WorkGroupInfo.getRegister();
}
+ // Supported for GFX12+.
+ Register addWaveID() {
+ ArgInfo.WaveID = ArgDescriptor::createRegister(AMDGPU::TTMP8, 0x1f << 25);
+ return ArgInfo.WaveID.getRegister();
+ }
+
+ bool hasWaveID() const { return WaveID; }
+
bool hasLDSKernelId() const { return LDSKernelId; }
// Add special VGPR inputs
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
index c732ff70942550..a9fe5cae38a1ea 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX12-GISEL %s
define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-SDAG-LABEL: _amdgpu_cs_main:
@@ -23,6 +25,30 @@ define amdgpu_cs void @_amdgpu_cs_main() {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: _amdgpu_cs_main:
+; GFX12-SDAG: ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s1
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT: s_nop 0
+; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: _amdgpu_cs_main:
+; GFX12-GISEL: ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT: s_nop 0
+; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-GISEL-NEXT: s_endpgm
.entry:
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -68,6 +94,24 @@ define amdgpu_cs void @caller() {
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: caller:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX12-SDAG-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-SDAG-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
+; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: caller:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX12-GISEL-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-GISEL-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
+; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-GISEL-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
call amdgpu_gfx void @callee(i32 %idx)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/wave-id-intrinsic.ll b/llvm/test/CodeGen/AMDGPU/wave-id-intrinsic.ll
new file mode 100644
index 00000000000000..35367bf52789c4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wave-id-intrinsic.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
+
+define amdgpu_cs void @test_wave_id(ptr addrspace(1) %out) {
+; GFX9-LABEL: test_wave_id:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_bfe_u32 s0, ttmp8, 0x50019
+; GFX9-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-NEXT: global_store_dword v[0:1], v2, off
+; GFX9-NEXT: s_endpgm
+;
+; GFX12-LABEL: test_wave_id:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_bfe_u32 s0, ttmp8, 0x50019
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+ %waveid = call i32 @llvm.amdgcn.wave.id()
+ store i32 %waveid, ptr addrspace(1) %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.wave.id()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12-GISEL: {{.*}}
+; GFX12-SDAG: {{.*}}
+; GFX9-GISEL: {{.*}}
+; GFX9-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index c492b54759d82d..d4e9dcab3f03ff 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -1,25 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel --verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
-; GCN-SDAG-LABEL: workgroup_id_x:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: workgroup_id_x:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_x:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: workgroup_id_x:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_x:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
+; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-SDAG-NEXT: s_nop 0
+; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_x:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-GISEL-NEXT: s_nop 0
+; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-GISEL-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1) %ptrx
@@ -27,27 +49,29 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
}
define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) {
-; GCN-SDAG-LABEL: workgroup_id_xy:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp7
-; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-SDAG-NEXT: s_endpgm
+; GFX9-LABEL: workgroup_id_xy:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7
+; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT: s_endpgm
;
-; GCN-GISEL-LABEL: workgroup_id_xy:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp9
-; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp7
-; GCN-GISEL-NEXT: global_store_dword v0, v1, s[2:3]
-; GCN-GISEL-NEXT: s_endpgm
+; GFX12-LABEL: workgroup_id_xy:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
+; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7
+; GFX12-NEXT: s_waitcnt lgkmcnt(0)
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: global_store_b32 v0, v2, s[2:3]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %idx, ptr addrspace(1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76140
More information about the llvm-commits
mailing list