[llvm] [AMDGPU] Support lowering of cluster related instrinsics (PR #157978)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 12:18:56 PDT 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/157978
>From 955846b48590ba355c8553ceb603dc4cc8211573 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 10 Sep 2025 20:59:22 -0400
Subject: [PATCH 1/3] [AMDGPU] Support lowering of cluster related instrinsics
Since many code are connected, this also changes how workgroup id is lowered.
Co-authored-by: Jay Foad <jay.foad at amd.com>
Co-authored-by: Ivan Kosarev <ivan.kosarev at amd.com>
---
llvm/docs/AMDGPUUsage.rst | 7 +
.../Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp | 8 +
.../Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 19 +-
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 221 ++-
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 8 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 211 ++-
llvm/lib/Target/AMDGPU/SIISelLowering.h | 9 +
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 +-
.../Target/AMDGPU/SIMachineFunctionInfo.cpp | 2 +
.../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 5 +
llvm/lib/Target/AMDGPU/SOPInstructions.td | 19 +-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 48 +
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 44 +
.../llvm.amdgcn.cluster.workgroup.id.ll | 1258 +++++++++++++++++
...vm.amdgcn.cluster.workgroup.max.flat.id.ll | 194 +++
.../llvm.amdgcn.cluster.workgroup.max.id.ll | 1077 ++++++++++++++
.../lower-work-group-id-intrinsics-hsa.ll | 2 +-
.../lower-work-group-id-intrinsics-opt.ll | 390 +++++
.../AMDGPU/lower-work-group-id-intrinsics.ll | 376 +++++
.../AMDGPU/reassoc-mul-add-1-to-mad.ll | 26 +-
.../AMDGPU/workgroup-id-in-arch-sgprs.ll | 216 ++-
21 files changed, 4100 insertions(+), 43 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 37563203f2f83..cef87e077cc5c 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1812,6 +1812,13 @@ The AMDGPU backend supports the following LLVM IR attributes.
offset by one less than the number of dynamic VGPR blocks required
by the function encoded in bits 5..3.
+ "amdgpu-cluster-dims"="x,y,z" Specify the cluster workgroup dimensions. A value of "0,0,0" indicates that
+ cluster is disabled. A value of "1024,1024,1024" indicates that cluster is enabled,
+ but the dimensions cannot be determined at compile time. Any other value explicitly
+ specifies the cluster dimensions.
+
+ This is only relevant on targets with cluster support.
+
================================================ ==========================================================
Calling Conventions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index d158f0f58d711..dda8033f47398 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -107,6 +107,14 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+ return std::tuple(nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
return std::tuple(LDSKernelId ? &LDSKernelId : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index e07d47381ecca..1064e57b9da9e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -111,18 +111,25 @@ struct AMDGPUFunctionArgInfo {
DISPATCH_ID = 4,
FLAT_SCRATCH_INIT = 5,
LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI
- WORKGROUP_ID_X = 10,
- WORKGROUP_ID_Y = 11,
- WORKGROUP_ID_Z = 12,
+ WORKGROUP_ID_X = 10, // Also used for cluster ID X.
+ WORKGROUP_ID_Y = 11, // Also used for cluster ID Y.
+ WORKGROUP_ID_Z = 12, // Also used for cluster ID Z.
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
IMPLICIT_BUFFER_PTR = 15,
IMPLICIT_ARG_PTR = 16,
PRIVATE_SEGMENT_SIZE = 17,
+ CLUSTER_WORKGROUP_ID_X = 21,
+ CLUSTER_WORKGROUP_ID_Y = 22,
+ CLUSTER_WORKGROUP_ID_Z = 23,
+ CLUSTER_WORKGROUP_MAX_ID_X = 24,
+ CLUSTER_WORKGROUP_MAX_ID_Y = 25,
+ CLUSTER_WORKGROUP_MAX_ID_Z = 26,
+ CLUSTER_WORKGROUP_MAX_FLAT_ID = 27,
// VGPRS:
- WORKITEM_ID_X = 18,
- WORKITEM_ID_Y = 19,
- WORKITEM_ID_Z = 20,
+ WORKITEM_ID_X = 28,
+ WORKITEM_ID_Y = 29,
+ WORKITEM_ID_Z = 30,
FIRST_VGPR_VALUE = WORKITEM_ID_X
};
// clang-format on
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index f18536cd4ab93..b5a41e3fbf8fb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4452,6 +4452,74 @@ void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg,
}
}
+bool AMDGPULegalizerInfo::legalizeWorkGroupId(
+ MachineInstr &MI, MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!ST.hasClusters()) {
+ if (!loadInputValue(DstReg, B, WorkGroupIdPV))
+ return false;
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Clusters are supported. Return the global position in the grid. If clusters
+ // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID.
+
+ // WorkGroupIdXYZ = ClusterId == 0 ?
+ // ClusterIdXYZ :
+ // ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ
+ MachineRegisterInfo &MRI = *B.getMRI();
+ const LLT S32 = LLT::scalar(32);
+ Register ClusterIdXYZ = MRI.createGenericVirtualRegister(S32);
+ Register ClusterMaxIdXYZ = MRI.createGenericVirtualRegister(S32);
+ Register ClusterWorkGroupIdXYZ = MRI.createGenericVirtualRegister(S32);
+ if (!loadInputValue(ClusterIdXYZ, B, WorkGroupIdPV) ||
+ !loadInputValue(ClusterWorkGroupIdXYZ, B, ClusterWorkGroupIdPV) ||
+ !loadInputValue(ClusterMaxIdXYZ, B, ClusterMaxIdPV))
+ return false;
+
+ auto One = B.buildConstant(S32, 1);
+ auto ClusterSizeXYZ = B.buildAdd(S32, ClusterMaxIdXYZ, One);
+ auto GlobalIdXYZ = B.buildAdd(S32, ClusterWorkGroupIdXYZ,
+ B.buildMul(S32, ClusterIdXYZ, ClusterSizeXYZ));
+
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+
+ switch (MFI->getClusterDims().getKind()) {
+ case AMDGPU::ClusterDimsAttr::Kind::FixedDims:
+ case AMDGPU::ClusterDimsAttr::Kind::VariableDims: {
+ B.buildCopy(DstReg, GlobalIdXYZ);
+ MI.eraseFromParent();
+ return true;
+ }
+ case AMDGPU::ClusterDimsAttr::Kind::NoCluster: {
+ B.buildCopy(DstReg, ClusterIdXYZ);
+ MI.eraseFromParent();
+ return true;
+ }
+ case AMDGPU::ClusterDimsAttr::Kind::Unknown: {
+ using namespace AMDGPU::Hwreg;
+ unsigned ClusterIdField = HwregEncoding::encode(ID_IB_STS2, 6, 4);
+ Register ClusterId = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(ClusterId, &AMDGPU::SReg_32RegClass);
+ B.buildInstr(AMDGPU::S_GETREG_B32_const)
+ .addDef(ClusterId)
+ .addImm(ClusterIdField);
+ auto Zero = B.buildConstant(S32, 0);
+ auto NoClusters =
+ B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), ClusterId, Zero);
+ B.buildSelect(DstReg, NoClusters, ClusterIdXYZ, GlobalIdXYZ);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
+ llvm_unreachable("nothing should reach here");
+}
+
bool AMDGPULegalizerInfo::loadInputValue(
Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
@@ -4471,8 +4539,31 @@ bool AMDGPULegalizerInfo::loadInputValue(
AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
const ArgDescriptor WorkGroupIDZ =
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+ const ArgDescriptor ClusterWorkGroupIDX =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu);
+ const ArgDescriptor ClusterWorkGroupIDY =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u);
+ const ArgDescriptor ClusterWorkGroupIDZ =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u);
+ const ArgDescriptor ClusterWorkGroupMaxIDX =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u);
+ const ArgDescriptor ClusterWorkGroupMaxIDY =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u);
+ const ArgDescriptor ClusterWorkGroupMaxIDZ =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u);
+ const ArgDescriptor ClusterWorkGroupMaxFlatID =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u);
+
+ auto LoadConstant = [&](unsigned N) {
+ B.buildConstant(DstReg, N);
+ return true;
+ };
+
if (ST.hasArchitectedSGPRs() &&
(AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
+ AMDGPU::ClusterDimsAttr ClusterDims = MFI->getClusterDims();
+ bool HasFixedDims = ClusterDims.isFixedDims();
+
switch (ArgType) {
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
Arg = &WorkGroupIDX;
@@ -4489,6 +4580,53 @@ bool AMDGPULegalizerInfo::loadInputValue(
ArgRC = &AMDGPU::SReg_32RegClass;
ArgTy = LLT::scalar(32);
break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+ if (HasFixedDims && ClusterDims.getDims()[0] == 1)
+ return LoadConstant(0);
+ Arg = &ClusterWorkGroupIDX;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+ if (HasFixedDims && ClusterDims.getDims()[1] == 1)
+ return LoadConstant(0);
+ Arg = &ClusterWorkGroupIDY;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+ if (HasFixedDims && ClusterDims.getDims()[2] == 1)
+ return LoadConstant(0);
+ Arg = &ClusterWorkGroupIDZ;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+ if (HasFixedDims)
+ return LoadConstant(ClusterDims.getDims()[0] - 1);
+ Arg = &ClusterWorkGroupMaxIDX;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+ if (HasFixedDims)
+ return LoadConstant(ClusterDims.getDims()[1] - 1);
+ Arg = &ClusterWorkGroupMaxIDY;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+ if (HasFixedDims)
+ return LoadConstant(ClusterDims.getDims()[2] - 1);
+ Arg = &ClusterWorkGroupMaxIDZ;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+ Arg = &ClusterWorkGroupMaxFlatID;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
default:
break;
}
@@ -4499,10 +4637,9 @@ bool AMDGPULegalizerInfo::loadInputValue(
if (!Arg) {
if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
- // The intrinsic may appear when we have a 0 sized kernarg segment, in which
- // case the pointer argument may be missing and we use null.
- B.buildConstant(DstReg, 0);
- return true;
+ // The intrinsic may appear when we have a 0 sized kernarg segment, in
+ // which case the pointer argument may be missing and we use null.
+ return LoadConstant(0);
}
// It's undefined behavior if a function marked with the amdgpu-no-*
@@ -7415,6 +7552,22 @@ bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeConstHwRegRead(MachineInstr &MI,
+ MachineIRBuilder &B,
+ AMDGPU::Hwreg::Id HwReg,
+ unsigned LowBit,
+ unsigned Width) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!MRI.getRegClassOrNull(DstReg))
+ MRI.setRegClass(DstReg, &AMDGPU::SReg_32RegClass);
+ B.buildInstr(AMDGPU::S_GETREG_B32_const)
+ .addDef(DstReg)
+ .addImm(AMDGPU::Hwreg::HwregEncoding::encode(HwReg, LowBit, Width));
+ MI.eraseFromParent();
+ return true;
+}
+
static constexpr unsigned FPEnvModeBitField =
AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23);
@@ -7577,14 +7730,64 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeWorkitemIDIntrinsic(MI, MRI, B, 2,
AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
case Intrinsic::amdgcn_workgroup_id_x:
- return legalizePreloadedArgIntrin(MI, MRI, B,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+ return legalizeWorkGroupId(
+ MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
case Intrinsic::amdgcn_workgroup_id_y:
- return legalizePreloadedArgIntrin(MI, MRI, B,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+ return legalizeWorkGroupId(
+ MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
case Intrinsic::amdgcn_workgroup_id_z:
- return legalizePreloadedArgIntrin(MI, MRI, B,
+ return legalizeWorkGroupId(
+ MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_cluster_id_x:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+ case Intrinsic::amdgcn_cluster_id_y:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+ case Intrinsic::amdgcn_cluster_id_z:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_cluster_workgroup_id_x:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
+ case Intrinsic::amdgcn_cluster_workgroup_id_y:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
+ case Intrinsic::amdgcn_cluster_workgroup_id_z:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_cluster_workgroup_flat_id:
+ return AMDGPU::isGFX1250(ST) &&
+ legalizeConstHwRegRead(MI, B, AMDGPU::Hwreg::ID_IB_STS2, 21, 4);
+ case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X);
+ case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y);
+ case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z);
+ case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
+ return ST.hasGFX1250Insts() &&
+ legalizePreloadedArgIntrin(
+ MI, MRI, B,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID);
case Intrinsic::amdgcn_wave_id:
return legalizeWaveID(MI, B);
case Intrinsic::amdgcn_lds_kernel_id:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 1f4e02b0d600a..cd44a9ba0807c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -114,6 +114,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
void buildLoadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg,
const TargetRegisterClass *ArgRC, LLT ArgTy) const;
+ bool legalizeWorkGroupId(
+ MachineInstr &MI, MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
@@ -218,6 +223,9 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeConstHwRegRead(MachineInstr &MI, MachineIRBuilder &B,
+ AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
+ unsigned Width) const;
bool legalizeGetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cb3e544449bbf..5c4538c0cc56e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2408,6 +2408,53 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG,
return ArgValue;
}
+SDValue SITargetLowering::lowerWorkGroupId(
+ SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
+ AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const {
+ if (!Subtarget->hasClusters())
+ return getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV);
+
+ // Clusters are supported. Return the global position in the grid. If clusters
+ // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID.
+
+ // WorkGroupIdXYZ = ClusterId == 0 ?
+ // ClusterIdXYZ :
+ // ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ
+ SDValue ClusterIdXYZ = getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV);
+ SDLoc SL(ClusterIdXYZ);
+ SDValue ClusterMaxIdXYZ = getPreloadedValue(DAG, MFI, VT, ClusterMaxIdPV);
+ SDValue One = DAG.getConstant(1, SL, VT);
+ SDValue ClusterSizeXYZ = DAG.getNode(ISD::ADD, SL, VT, ClusterMaxIdXYZ, One);
+ SDValue ClusterWorkGroupIdXYZ =
+ getPreloadedValue(DAG, MFI, VT, ClusterWorkGroupIdPV);
+ SDValue GlobalIdXYZ =
+ DAG.getNode(ISD::ADD, SL, VT, ClusterWorkGroupIdXYZ,
+ DAG.getNode(ISD::MUL, SL, VT, ClusterIdXYZ, ClusterSizeXYZ));
+
+ switch (MFI.getClusterDims().getKind()) {
+ case AMDGPU::ClusterDimsAttr::Kind::FixedDims:
+ case AMDGPU::ClusterDimsAttr::Kind::VariableDims:
+ return GlobalIdXYZ;
+ case AMDGPU::ClusterDimsAttr::Kind::NoCluster:
+ return ClusterIdXYZ;
+ case AMDGPU::ClusterDimsAttr::Kind::Unknown: {
+ using namespace AMDGPU::Hwreg;
+ SDValue ClusterIdField =
+ DAG.getTargetConstant(HwregEncoding::encode(ID_IB_STS2, 6, 4), SL, VT);
+ SDNode *GetReg =
+ DAG.getMachineNode(AMDGPU::S_GETREG_B32_const, SL, VT, ClusterIdField);
+ SDValue ClusterId(GetReg, 0);
+ SDValue Zero = DAG.getConstant(0, SL, VT);
+ return DAG.getNode(ISD::SELECT_CC, SL, VT, ClusterId, Zero, ClusterIdXYZ,
+ GlobalIdXYZ, DAG.getCondCode(ISD::SETEQ));
+ }
+ }
+
+ llvm_unreachable("nothing should reach here");
+}
+
SDValue SITargetLowering::getPreloadedValue(
SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
@@ -2426,9 +2473,30 @@ SDValue SITargetLowering::getPreloadedValue(
AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
const ArgDescriptor WorkGroupIDZ =
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+ const ArgDescriptor ClusterWorkGroupIDX =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu);
+ const ArgDescriptor ClusterWorkGroupIDY =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u);
+ const ArgDescriptor ClusterWorkGroupIDZ =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u);
+ const ArgDescriptor ClusterWorkGroupMaxIDX =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u);
+ const ArgDescriptor ClusterWorkGroupMaxIDY =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u);
+ const ArgDescriptor ClusterWorkGroupMaxIDZ =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u);
+ const ArgDescriptor ClusterWorkGroupMaxFlatID =
+ ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u);
+
+ auto LoadConstant = [&](unsigned N) {
+ return DAG.getConstant(N, SDLoc(), VT);
+ };
+
if (Subtarget->hasArchitectedSGPRs() &&
- (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx ||
- CC == CallingConv::AMDGPU_Gfx_WholeWave)) {
+ (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
+ AMDGPU::ClusterDimsAttr ClusterDims = MFI.getClusterDims();
+ bool HasFixedDims = ClusterDims.isFixedDims();
+
switch (PVID) {
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
Reg = &WorkGroupIDX;
@@ -2445,6 +2513,53 @@ SDValue SITargetLowering::getPreloadedValue(
RC = &AMDGPU::SReg_32RegClass;
Ty = LLT::scalar(32);
break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+ if (HasFixedDims && ClusterDims.getDims()[0] == 1)
+ return LoadConstant(0);
+ Reg = &ClusterWorkGroupIDX;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+ if (HasFixedDims && ClusterDims.getDims()[1] == 1)
+ return LoadConstant(0);
+ Reg = &ClusterWorkGroupIDY;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+ if (HasFixedDims && ClusterDims.getDims()[2] == 1)
+ return LoadConstant(0);
+ Reg = &ClusterWorkGroupIDZ;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+ if (HasFixedDims)
+ return LoadConstant(ClusterDims.getDims()[0] - 1);
+ Reg = &ClusterWorkGroupMaxIDX;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+ if (HasFixedDims)
+ return LoadConstant(ClusterDims.getDims()[1] - 1);
+ Reg = &ClusterWorkGroupMaxIDY;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+ if (HasFixedDims)
+ return LoadConstant(ClusterDims.getDims()[2] - 1);
+ Reg = &ClusterWorkGroupMaxIDZ;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+ Reg = &ClusterWorkGroupMaxFlatID;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
default:
break;
}
@@ -9528,6 +9643,19 @@ SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
}
+SDValue SITargetLowering::lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
+ AMDGPU::Hwreg::Id HwReg,
+ unsigned LowBit,
+ unsigned Width) const {
+ SDLoc SL(Op);
+ using namespace AMDGPU::Hwreg;
+ return {DAG.getMachineNode(
+ AMDGPU::S_GETREG_B32_const, SL, MVT::i32,
+ DAG.getTargetConstant(HwregEncoding::encode(HwReg, LowBit, Width),
+ SL, MVT::i32)),
+ 0};
+}
+
SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
unsigned Dim,
const ArgDescriptor &Arg) const {
@@ -9674,14 +9802,81 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::amdgcn_workgroup_id_x:
- return getPreloadedValue(DAG, *MFI, VT,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+ return lowerWorkGroupId(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
case Intrinsic::amdgcn_workgroup_id_y:
- return getPreloadedValue(DAG, *MFI, VT,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+ return lowerWorkGroupId(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
case Intrinsic::amdgcn_workgroup_id_z:
- return getPreloadedValue(DAG, *MFI, VT,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ return lowerWorkGroupId(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_cluster_id_x:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_X)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_id_y:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Y)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_id_z:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_workgroup_id_x:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(
+ DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_workgroup_id_y:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(
+ DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_workgroup_id_z:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(
+ DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_workgroup_flat_id:
+ return AMDGPU::isGFX1250(*Subtarget)
+ ? lowerConstHwRegRead(DAG, Op, AMDGPU::Hwreg::ID_IB_STS2, 21, 4)
+ : SDValue();
+ case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(
+ DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(
+ DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(
+ DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z)
+ : DAG.getUNDEF(VT);
+ case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
+ return Subtarget->hasGFX1250Insts()
+ ? getPreloadedValue(
+ DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID)
+ : DAG.getUNDEF(VT);
case Intrinsic::amdgcn_wave_id:
return lowerWaveID(DAG, Op);
case Intrinsic::amdgcn_lds_kernel_id: {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 4886fcf9fd012..48f729c260c69 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -16,6 +16,7 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUISelLowering.h"
+#include "SIDefines.h"
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -61,6 +62,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
const SDLoc &SL, SDValue Chain,
const ISD::InputArg &Arg) const;
+ SDValue lowerWorkGroupId(
+ SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+ AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
SDValue getPreloadedValue(SelectionDAG &DAG,
const SIMachineFunctionInfo &MFI,
EVT VT,
@@ -81,6 +87,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
unsigned NewOpcode) const;
SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
+ SDValue lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
+ AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
+ unsigned Width) const;
SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
const ArgDescriptor &ArgDesc) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f7dde2b90b68e..a80bd81c7ea11 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -926,7 +926,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
- Opcode == AMDGPU::S_GETREG_B32;
+ Opcode == AMDGPU::S_GETREG_B32 ||
+ Opcode == AMDGPU::S_GETREG_B32_const;
}
/// \returns true if this is an s_store_dword* instruction. This is more
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 54426d33d3473..1f11be475e9f8 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -195,6 +195,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
VGPRForAGPRCopy =
AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
}
+
+ ClusterDims = AMDGPU::ClusterDimsAttr::get(F);
}
MachineFunctionInfo *SIMachineFunctionInfo::clone(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ca8f8033a2d54..45606153db58e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -465,6 +465,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// Default/requested number of work groups for the function.
SmallVector<unsigned> MaxNumWorkGroups = {0, 0, 0};
+ // Requested cluster dimensions.
+ AMDGPU::ClusterDimsAttr ClusterDims;
+
private:
unsigned NumUserSGPRs = 0;
unsigned NumSystemSGPRs = 0;
@@ -1207,6 +1210,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; }
unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; }
unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; }
+
+ AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index fe94887cdff98..296ce5a46287c 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1127,19 +1127,26 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
"$sdst, $simm16"
>;
-// This is hasSideEffects to allow its use in readcyclecounter selection.
// FIXME: Need to truncate immediate to 16-bits.
-// FIXME: Should have separate pseudos for known may read MODE and
-// only read MODE.
-def S_GETREG_B32 : SOPK_Pseudo <
+class S_GETREG_B32_Pseudo<list<dag> pattern=[]> : SOPK_Pseudo <
"s_getreg_b32",
(outs SReg_32:$sdst), (ins hwreg:$simm16),
- "$sdst, $simm16",
- [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
+ "$sdst, $simm16", pattern>;
+
+// This is hasSideEffects to allow its use in readcyclecounter selection.
+// FIXME: Should have separate pseudos for known may read MODE and
+// only read MODE.
+def S_GETREG_B32 : S_GETREG_B32_Pseudo<
+ [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
let hasSideEffects = 1;
let Uses = [MODE];
}
+// A version of the pseudo for reading hardware register fields that are
+// known to remain the same during the course of the run. Has no side
+// effects and doesn't read MODE.
+def S_GETREG_B32_const : S_GETREG_B32_Pseudo;
+
let Defs = [MODE], Uses = [MODE] in {
// FIXME: Need to truncate immediate to 16-bits.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 40da4f96aefdb..faae1fee342af 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3533,6 +3533,54 @@ bool isPackedFP32Inst(unsigned Opc) {
}
}
+const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
+ assert(isFixedDims() && "expect kind to be FixedDims");
+ return Dims;
+}
+
+std::string ClusterDimsAttr::to_string() const {
+ SmallString<10> Buffer;
+ raw_svector_ostream OS(Buffer);
+
+ switch (getKind()) {
+ case Kind::Unknown:
+ return "";
+ case Kind::NoCluster: {
+ OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
+ return Buffer.c_str();
+ }
+ case Kind::VariableDims: {
+ OS << EncoVariableDims << ',' << EncoVariableDims << ','
+ << EncoVariableDims;
+ return Buffer.c_str();
+ }
+ case Kind::FixedDims: {
+ OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
+ return Buffer.c_str();
+ }
+ }
+ llvm_unreachable("Unknown ClusterDimsAttr kind");
+}
+
+ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
+ std::optional<SmallVector<unsigned>> Attr =
+ getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
+ ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
+
+ if (!Attr.has_value())
+ AttrKind = Kind::Unknown;
+ else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; }))
+ AttrKind = Kind::NoCluster;
+ else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; }))
+ AttrKind = Kind::VariableDims;
+
+ ClusterDimsAttr A(AttrKind);
+ if (AttrKind == Kind::FixedDims)
+ A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
+
+ return A;
+}
+
} // namespace AMDGPU
raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 3fcd16f9290b1..3f8d43db5a48c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1813,6 +1813,50 @@ bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
/// must be defined in terms of bytes.
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
+class ClusterDimsAttr {
+public:
+ enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
+
+ ClusterDimsAttr() = default;
+
+ Kind getKind() const { return AttrKind; }
+
+ bool isUnknown() const { return getKind() == Kind::Unknown; }
+
+ bool isNoCluster() const { return getKind() == Kind::NoCluster; }
+
+ bool isFixedDims() const { return getKind() == Kind::FixedDims; }
+
+ bool isVariableedDims() const { return getKind() == Kind::VariableDims; }
+
+ void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }
+
+ void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); }
+
+ void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); }
+
+ /// \returns the dims stored. Note that this function can only be called if
+ /// the kind is \p Fixed.
+ const std::array<unsigned, 3> &getDims() const;
+
+ bool operator==(const ClusterDimsAttr &RHS) const {
+ return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
+ }
+
+ std::string to_string() const;
+
+ static ClusterDimsAttr get(const Function &F);
+
+private:
+ enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
+
+ ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
+
+ std::array<unsigned, 3> Dims = {0, 0, 0};
+
+ Kind AttrKind = Kind::Unknown;
+};
+
} // end namespace AMDGPU
raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
new file mode 100644
index 0000000000000..aa3b7b3606fd8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
@@ -0,0 +1,1258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.id.x() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.id.y() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.id.z() #0
+
+define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_x:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_and_b32 s2, ttmp6, 15
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_x:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_and_b32 s2, ttmp6, 15
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_and_b32 s2, ttmp6, 15
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_x:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_and_b32 s2, ttmp6, 15
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1,2,2" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_x_optimized:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_x_optimized:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x_optimized:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_x_optimized:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_y:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_y:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_y:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_y_optimized:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_y_optimized:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y_optimized:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_y_optimized:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_z:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_z:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_z:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_flat_id(ptr addrspace(1) %out) {
+; CHECK-UNKNOWN-LABEL: test_workgroup_flat_id:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_flat_id:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_flat_id:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_flat_id:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.flat.id()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,2,1" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_z_optimized:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_z_optimized:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z_optimized:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_z_optimized:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
new file mode 100644
index 0000000000000..afe37e371fbc3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
@@ -0,0 +1,194 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() #0
+
+define amdgpu_kernel void @test_workgroup_max_flat_id(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_flat_id:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_flat_id:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_flat_id:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_flat_id:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
new file mode 100644
index 0000000000000..7ea4fa5373e57
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
@@ -0,0 +1,1077 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.x() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.y() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.z() #0
+
+define amdgpu_kernel void @test_workgroup_max_id_x(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_x:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_y(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_y:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_z(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_z:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-UNKNOWN: ; %bb.0:
+; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6
+; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-MESA3D: .amd_kernel_code_t
+; CHECK-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT: priority = 0
+; CHECK-MESA3D-NEXT: float_mode = 240
+; CHECK-MESA3D-NEXT: priv = 0
+; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT: debug_mode = 0
+; CHECK-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-MESA3D-NEXT: enable_exception = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT: private_element_size = 1
+; CHECK-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-MESA3D-NEXT: wavefront_size = 5
+; CHECK-MESA3D-NEXT: call_convention = -1
+; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT: ; %bb.0:
+; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6
+; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT: s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-G-UNKNOWN: ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT: s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-G-MESA3D: .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: priority = 0
+; CHECK-G-MESA3D-NEXT: float_mode = 240
+; CHECK-G-MESA3D-NEXT: priv = 0
+; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT: debug_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT: enable_exception = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT: private_element_size = 1
+; CHECK-G-MESA3D-NEXT: is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT: wavefront_size = 5
+; CHECK-G-MESA3D-NEXT: call_convention = -1
+; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT: ; %bb.0:
+; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 2554d99def57f..169a84ff1f86b 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -297,6 +297,6 @@ declare i32 @llvm.amdgcn.workgroup.id.y()
declare i32 @llvm.amdgcn.workgroup.id.z()
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
-attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
+attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX9ARCH: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
new file mode 100644
index 0000000000000..69439d49e588f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck -check-prefix=GFX1250-GISEL %s
+
+define void @test_workgroup_id_x_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s0, ttmp9, s1
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s0, ttmp9, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, ttmp9
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, ttmp9
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.x()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_y_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.y()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_z_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 15
+; GFX1250-SDAG-NEXT: s_bfe_u32 s1, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT: s_and_b32 s0, s0, 0x1fffe
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX1250-GISEL-NEXT: s_bfe_u32 s1, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, s0, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %id = call i32 @llvm.amdgcn.workgroup.id.z()
+ store i32 %id, ptr addrspace(1) %out
+ ret void
+}
+
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
new file mode 100644
index 0000000000000..497241cff392d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -0,0 +1,376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs < %s | FileCheck -check-prefix=GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel < %s | FileCheck -check-prefix=GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel < %s | FileCheck -check-prefix=GFX12-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -global-isel < %s | FileCheck -check-prefix=GFX1250-GISEL %s
+
+define amdgpu_cs void @_amdgpu_cs_main() {
+; GFX9-SDAG-LABEL: _amdgpu_cs_main:
+; GFX9-SDAG: ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: _amdgpu_cs_main:
+; GFX9-GISEL: ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: _amdgpu_cs_main:
+; GFX12-SDAG: ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: _amdgpu_cs_main:
+; GFX12-GISEL: ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: _amdgpu_cs_main:
+; GFX1250-SDAG: ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT: s_and_b32 s3, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, s3, s2
+; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT: s_lshr_b32 s5, ttmp7, 16
+; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, 1
+; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, s5, s4
+; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s0
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s6, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s5, s4
+; GFX1250-SDAG-NEXT: s_cselect_b32 s1, ttmp9, s1
+; GFX1250-SDAG-NEXT: s_cselect_b32 s2, s3, s2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: _amdgpu_cs_main:
+; GFX1250-GISEL: ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s0, ttmp9, s1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s1, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT: s_and_b32 s3, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s4, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT: s_mul_i32 s1, s3, s1
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s4, s4, s1
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s1, s3, s4
+; GFX1250-GISEL-NEXT: s_bfe_u32 s3, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT: s_lshr_b32 s4, ttmp7, 16
+; GFX1250-GISEL-NEXT: s_add_co_i32 s3, s3, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s5, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT: s_mul_i32 s3, s4, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s5, s5, s3
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s2, s4, s5
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT: s_endpgm
+.entry:
+ %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+ %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+ %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+ %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+ %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+ %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+ call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_cs void @workgroup_id_no_clusters() "amdgpu-cluster-dims"="0,0,0" {
+; GFX9-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX9-SDAG: ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX9-GISEL: ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX12-SDAG: ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX12-GISEL: ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX1250-SDAG: ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX1250-GISEL: ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT: s_endpgm
+.entry:
+ %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+ %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+ %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+ %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+ %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+ %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+ call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_cs void @workgroup_id_optimized() "amdgpu-cluster-dims"="2,3,4" {
+; GFX9-SDAG-LABEL: workgroup_id_optimized:
+; GFX9-SDAG: ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_optimized:
+; GFX9-GISEL: ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_optimized:
+; GFX12-SDAG: ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_optimized:
+; GFX12-GISEL: ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_optimized:
+; GFX1250-SDAG: ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 14
+; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT: s_and_b32 s0, s2, 0x3fffc
+; GFX1250-SDAG-NEXT: s_and_b32 s2, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_bfe_u32 s3, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT: s_mul_i32 s2, s2, 3
+; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s0
+; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s4
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_optimized:
+; GFX1250-GISEL: ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, 3
+; GFX1250-GISEL-NEXT: s_lshr_b32 s3, ttmp7, 16
+; GFX1250-GISEL-NEXT: s_bfe_u32 s4, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s2, s1
+; GFX1250-GISEL-NEXT: s_lshl2_add_u32 s2, s3, s4
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT: s_endpgm
+.entry:
+ %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+ %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+ %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+ %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+ %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+ %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+ call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_cs void @caller() {
+; GFX9-SDAG-LABEL: caller:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9]
+; GFX9-SDAG-NEXT: s_mov_b32 s8, s0
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-SDAG-NEXT: s_mov_b32 s5, callee at abs32@hi
+; GFX9-SDAG-NEXT: s_mov_b32 s4, callee at abs32@lo
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_add_u32 s8, s8, s0
+; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9]
+; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: caller:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9]
+; GFX9-GISEL-NEXT: s_mov_b32 s8, s0
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-GISEL-NEXT: s_mov_b32 s4, callee at abs32@lo
+; GFX9-GISEL-NEXT: s_mov_b32 s5, callee at abs32@hi
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_add_u32 s8, s8, s0
+; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9]
+; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: caller:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX12-SDAG-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-SDAG-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
+; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
+; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: caller:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
+; GFX12-GISEL-NEXT: s_mov_b32 s0, callee at abs32@lo
+; GFX12-GISEL-NEXT: s_mov_b32 s1, callee at abs32@hi
+; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
+; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
+; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX12-GISEL-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: caller:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT: s_mov_b32 s32, 0
+; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s2, ttmp9, s1
+; GFX1250-SDAG-NEXT: s_mov_b64 s[0:1], callee at abs64
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT: s_swap_pc_i64 s[30:31], s[0:1]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: caller:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT: s_mov_b32 s32, 0
+; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s2, ttmp9, s1
+; GFX1250-GISEL-NEXT: s_mov_b64 s[0:1], callee at abs64
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1]
+; GFX1250-GISEL-NEXT: s_endpgm
+ %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+ call amdgpu_gfx void @callee(i32 %idx)
+ ret void
+}
+
+declare amdgpu_gfx void @callee(i32)
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 25609e881254e..b2bcb74e4184f 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -4089,32 +4089,44 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %
; GFX1250-NEXT: s_add_co_i32 s0, s10, 1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: v_mul_lo_u32 v1, s0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_and_b32 s4, ttmp6, 15
+; GFX1250-NEXT: s_getreg_b32 s5, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x4
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_bfe_u32 s3, ttmp6, 0x4000c
; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX1250-NEXT: s_add_co_i32 s3, s3, 1
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_mul_i32 s3, ttmp9, s3
; GFX1250-NEXT: v_add_nc_u32_e32 v1, v3, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_add_co_i32 s4, s4, s3
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v2
; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff
+; GFX1250-NEXT: s_cmp_eq_u32 s5, 0
; GFX1250-NEXT: v_mul_lo_u32 v3, v1, v2
-; GFX1250-NEXT: v_mad_u32 v0, ttmp9, s2, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: s_cselect_b32 s3, ttmp9, s4
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-NEXT: v_mad_u32 v0, s3, s2, v0
; GFX1250-NEXT: v_add_nc_u32_e32 v2, v3, v2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v1
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_mad_u32 v3, v2, v3, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9]
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-NEXT: v_mad_u32 v2, v3, v2, v3
; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index 7a64e55abb8d3..afca83a7e1c36 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
;
@@ -15,6 +17,50 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
+; GFX1200-LABEL: workgroup_id_x:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT: s_wait_kmcnt 0x0
+; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_x:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT: s_and_b32 s3, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_mul_i32 s2, ttmp9, s2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s2
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s4, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s2, ttmp9, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_x:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT: s_and_b32 s3, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, 1
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_mul_i32 s2, ttmp9, s2
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT: s_add_co_i32 s3, s3, s2
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s4, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s2, ttmp9, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT: s_endpgm
; GFX12-LABEL: workgroup_id_x:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
@@ -41,6 +87,74 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
; GFX9-NEXT: global_store_dword v1, v2, s[2:3]
; GFX9-NEXT: s_endpgm
;
+; GFX1200-LABEL: workgroup_id_xy:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1200-NEXT: s_and_b32 s4, ttmp7, 0xffff
+; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT: v_mov_b32_e32 v2, s4
+; GFX1200-NEXT: s_wait_kmcnt 0x0
+; GFX1200-NEXT: s_clause 0x1
+; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT: global_store_b32 v1, v2, s[2:3]
+; GFX1200-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_xy:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-SDAG-NEXT: s_bfe_u32 s6, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_and_b32 s4, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_add_co_i32 s6, s6, 1
+; GFX1250-SDAG-NEXT: s_bfe_u32 s7, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT: s_mul_i32 s5, s4, s6
+; GFX1250-SDAG-NEXT: s_bfe_u32 s6, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s7, 1
+; GFX1250-SDAG-NEXT: s_add_co_i32 s6, s6, s5
+; GFX1250-SDAG-NEXT: s_and_b32 s5, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_mul_i32 s7, ttmp9, s7
+; GFX1250-SDAG-NEXT: s_getreg_b32 s8, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s5, s5, s7
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s8, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s5, ttmp9, s5
+; GFX1250-SDAG-NEXT: s_cselect_b32 s4, s4, s6
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s5
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_clause 0x1
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v2, s[2:3]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_xy:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_bfe_u32 s6, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-GISEL-NEXT: s_add_co_i32 s6, s6, 1
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_and_b32 s4, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_mul_i32 s5, ttmp9, s6
+; GFX1250-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s4, s4, s5
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s4, ttmp9, s4
+; GFX1250-GISEL-NEXT: s_bfe_u32 s5, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT: s_and_b32 s7, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_add_co_i32 s5, s5, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s8, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT: s_mul_i32 s5, s7, s5
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX1250-GISEL-NEXT: s_add_co_i32 s8, s8, s5
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s4, s7, s8
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_clause 0x1
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v2, s[2:3]
+; GFX1250-GISEL-NEXT: s_endpgm
; GFX12-LABEL: workgroup_id_xy:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
@@ -77,6 +191,99 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
; GFX9-NEXT: s_endpgm
;
+; GFX1200-LABEL: workgroup_id_xyz:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: s_clause 0x1
+; GFX1200-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1200-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1200-NEXT: s_and_b32 s6, ttmp7, 0xffff
+; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT: s_lshr_b32 s7, ttmp7, 16
+; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1200-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX1200-NEXT: s_wait_kmcnt 0x0
+; GFX1200-NEXT: s_clause 0x2
+; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT: global_store_b32 v1, v2, s[2:3]
+; GFX1200-NEXT: global_store_b32 v1, v3, s[4:5]
+; GFX1200-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_xyz:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT: s_lshr_b32 s6, ttmp7, 16
+; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s0, 1
+; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT: s_mul_i32 s7, s6, s7
+; GFX1250-SDAG-NEXT: s_bfe_u32 s8, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT: s_and_b32 s10, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, 1
+; GFX1250-SDAG-NEXT: s_bfe_u32 s11, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT: s_add_co_i32 s8, s8, s7
+; GFX1250-SDAG-NEXT: s_mul_i32 s7, s10, s9
+; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT: s_add_co_i32 s11, s11, 1
+; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, s7
+; GFX1250-SDAG-NEXT: s_and_b32 s7, ttmp6, 15
+; GFX1250-SDAG-NEXT: s_mul_i32 s11, ttmp9, s11
+; GFX1250-SDAG-NEXT: s_getreg_b32 s12, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s7, s11
+; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s12, 0
+; GFX1250-SDAG-NEXT: s_cselect_b32 s7, ttmp9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s7
+; GFX1250-SDAG-NEXT: s_cselect_b32 s7, s10, s9
+; GFX1250-SDAG-NEXT: s_cselect_b32 s6, s6, s8
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s6
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: s_clause 0x2
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v2, s[2:3]
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v3, s[4:5]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_xyz:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s7, ttmp9, s1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT: s_and_b32 s8, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT: s_mul_i32 s10, s8, s0
+; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, s10
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s7
+; GFX1250-GISEL-NEXT: s_cselect_b32 s8, s8, s9
+; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT: s_lshr_b32 s10, ttmp7, 16
+; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, 1
+; GFX1250-GISEL-NEXT: s_bfe_u32 s11, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT: s_mul_i32 s9, s10, s9
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT: s_add_co_i32 s11, s11, s9
+; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT: s_cselect_b32 s6, s10, s11
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v3, s6
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: s_clause 0x2
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v2, s[2:3]
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v3, s[4:5]
+; GFX1250-GISEL-NEXT: s_endpgm
; GFX12-LABEL: workgroup_id_xyz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_clause 0x1
@@ -107,7 +314,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
declare i32 @llvm.amdgcn.workgroup.id.y()
declare i32 @llvm.amdgcn.workgroup.id.z()
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
+; GFX1250: {{.*}}
; GFX9-GISEL: {{.*}}
; GFX9-SDAG: {{.*}}
>From 6c2e11083fdb5afd7e450a925d93a91f3260cfc2 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 11 Sep 2025 15:14:22 -0400
Subject: [PATCH 2/3] fix comments
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 42 +++++++++++------------
1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5c4538c0cc56e..dfb6fd1a7b7a2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9817,66 +9817,66 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
case Intrinsic::amdgcn_cluster_id_x:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_id_y:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_id_z:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_workgroup_id_x:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(
DAG, *MFI, VT,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_workgroup_id_y:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(
DAG, *MFI, VT,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_workgroup_id_z:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(
DAG, *MFI, VT,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_workgroup_flat_id:
- return AMDGPU::isGFX1250(*Subtarget)
+ return Subtarget->hasClusters()
? lowerConstHwRegRead(DAG, Op, AMDGPU::Hwreg::ID_IB_STS2, 21, 4)
: SDValue();
case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(
DAG, *MFI, VT,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(
DAG, *MFI, VT,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(
DAG, *MFI, VT,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
- return Subtarget->hasGFX1250Insts()
+ return Subtarget->hasClusters()
? getPreloadedValue(
DAG, *MFI, VT,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID)
- : DAG.getUNDEF(VT);
+ : DAG.getPOISON(VT);
case Intrinsic::amdgcn_wave_id:
return lowerWaveID(DAG, Op);
case Intrinsic::amdgcn_lds_kernel_id: {
>From aa5ace8976a0a5bcb671c36f4e73f297a4e9ffe4 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 11 Sep 2025 15:18:36 -0400
Subject: [PATCH 3/3] more fix
---
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b5a41e3fbf8fb..b6b82cadcc25a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7745,46 +7745,46 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
case Intrinsic::amdgcn_cluster_id_x:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
case Intrinsic::amdgcn_cluster_id_y:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
case Intrinsic::amdgcn_cluster_id_z:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_cluster_workgroup_id_x:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(
MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
case Intrinsic::amdgcn_cluster_workgroup_id_y:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(
MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
case Intrinsic::amdgcn_cluster_workgroup_id_z:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(
MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
case Intrinsic::amdgcn_cluster_workgroup_flat_id:
return AMDGPU::isGFX1250(ST) &&
legalizeConstHwRegRead(MI, B, AMDGPU::Hwreg::ID_IB_STS2, 21, 4);
case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(
MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X);
case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(
MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y);
case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(
MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z);
case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
- return ST.hasGFX1250Insts() &&
+ return ST.hasClusters() &&
legalizePreloadedArgIntrin(
MI, MRI, B,
AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID);
More information about the llvm-commits
mailing list