[llvm] [AMDGPU] Support lowering of cluster related instrinsics (PR #157978)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 11 12:18:56 PDT 2025


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/157978

>From 955846b48590ba355c8553ceb603dc4cc8211573 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 10 Sep 2025 20:59:22 -0400
Subject: [PATCH 1/3] [AMDGPU] Support lowering of cluster related instrinsics

Since many code are connected, this also changes how workgroup id is lowered.

Co-authored-by: Jay Foad <jay.foad at amd.com>
Co-authored-by: Ivan Kosarev <ivan.kosarev at amd.com>
---
 llvm/docs/AMDGPUUsage.rst                     |    7 +
 .../Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp |    8 +
 .../Target/AMDGPU/AMDGPUArgumentUsageInfo.h   |   19 +-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  221 ++-
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h  |    8 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  211 ++-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |    9 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |    3 +-
 .../Target/AMDGPU/SIMachineFunctionInfo.cpp   |    2 +
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |    5 +
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |   19 +-
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |   48 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   44 +
 .../llvm.amdgcn.cluster.workgroup.id.ll       | 1258 +++++++++++++++++
 ...vm.amdgcn.cluster.workgroup.max.flat.id.ll |  194 +++
 .../llvm.amdgcn.cluster.workgroup.max.id.ll   | 1077 ++++++++++++++
 .../lower-work-group-id-intrinsics-hsa.ll     |    2 +-
 .../lower-work-group-id-intrinsics-opt.ll     |  390 +++++
 .../AMDGPU/lower-work-group-id-intrinsics.ll  |  376 +++++
 .../AMDGPU/reassoc-mul-add-1-to-mad.ll        |   26 +-
 .../AMDGPU/workgroup-id-in-arch-sgprs.ll      |  216 ++-
 21 files changed, 4100 insertions(+), 43 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 37563203f2f83..cef87e077cc5c 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1812,6 +1812,13 @@ The AMDGPU backend supports the following LLVM IR attributes.
                                                       offset by one less than the number of dynamic VGPR blocks required
                                                       by the function encoded in bits 5..3.
 
+     "amdgpu-cluster-dims"="x,y,z"                    Specify the cluster workgroup dimensions. A value of "0,0,0" indicates that
+                                                      cluster is disabled. A value of "1024,1024,1024" indicates that cluster is enabled,
+                                                      but the dimensions cannot be determined at compile time. Any other value explicitly
+                                                      specifies the cluster dimensions.
+
+                                                      This is only relevant on targets with cluster support.
+
      ================================================ ==========================================================
 
 Calling Conventions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index d158f0f58d711..dda8033f47398 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -107,6 +107,14 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
   case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
     return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
                       &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+  case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+    return std::tuple(nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
     return std::tuple(LDSKernelId ? &LDSKernelId : nullptr,
                       &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index e07d47381ecca..1064e57b9da9e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -111,18 +111,25 @@ struct AMDGPUFunctionArgInfo {
     DISPATCH_ID         =  4,
     FLAT_SCRATCH_INIT   =  5,
     LDS_KERNEL_ID       =  6, // LLVM internal, not part of the ABI
-    WORKGROUP_ID_X      = 10,
-    WORKGROUP_ID_Y      = 11,
-    WORKGROUP_ID_Z      = 12,
+    WORKGROUP_ID_X      = 10, // Also used for cluster ID X.
+    WORKGROUP_ID_Y      = 11, // Also used for cluster ID Y.
+    WORKGROUP_ID_Z      = 12, // Also used for cluster ID Z.
     PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
     IMPLICIT_BUFFER_PTR = 15,
     IMPLICIT_ARG_PTR = 16,
     PRIVATE_SEGMENT_SIZE = 17,
+    CLUSTER_WORKGROUP_ID_X = 21,
+    CLUSTER_WORKGROUP_ID_Y = 22,
+    CLUSTER_WORKGROUP_ID_Z = 23,
+    CLUSTER_WORKGROUP_MAX_ID_X = 24,
+    CLUSTER_WORKGROUP_MAX_ID_Y = 25,
+    CLUSTER_WORKGROUP_MAX_ID_Z = 26,
+    CLUSTER_WORKGROUP_MAX_FLAT_ID = 27,
 
     // VGPRS:
-    WORKITEM_ID_X       = 18,
-    WORKITEM_ID_Y       = 19,
-    WORKITEM_ID_Z       = 20,
+    WORKITEM_ID_X       = 28,
+    WORKITEM_ID_Y       = 29,
+    WORKITEM_ID_Z       = 30,
     FIRST_VGPR_VALUE    = WORKITEM_ID_X
   };
   // clang-format on
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index f18536cd4ab93..b5a41e3fbf8fb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4452,6 +4452,74 @@ void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg,
   }
 }
 
+bool AMDGPULegalizerInfo::legalizeWorkGroupId(
+    MachineInstr &MI, MachineIRBuilder &B,
+    AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const {
+  Register DstReg = MI.getOperand(0).getReg();
+  if (!ST.hasClusters()) {
+    if (!loadInputValue(DstReg, B, WorkGroupIdPV))
+      return false;
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Clusters are supported. Return the global position in the grid. If clusters
+  // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID.
+
+  // WorkGroupIdXYZ = ClusterId == 0 ?
+  //   ClusterIdXYZ :
+  //   ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ
+  MachineRegisterInfo &MRI = *B.getMRI();
+  const LLT S32 = LLT::scalar(32);
+  Register ClusterIdXYZ = MRI.createGenericVirtualRegister(S32);
+  Register ClusterMaxIdXYZ = MRI.createGenericVirtualRegister(S32);
+  Register ClusterWorkGroupIdXYZ = MRI.createGenericVirtualRegister(S32);
+  if (!loadInputValue(ClusterIdXYZ, B, WorkGroupIdPV) ||
+      !loadInputValue(ClusterWorkGroupIdXYZ, B, ClusterWorkGroupIdPV) ||
+      !loadInputValue(ClusterMaxIdXYZ, B, ClusterMaxIdPV))
+    return false;
+
+  auto One = B.buildConstant(S32, 1);
+  auto ClusterSizeXYZ = B.buildAdd(S32, ClusterMaxIdXYZ, One);
+  auto GlobalIdXYZ = B.buildAdd(S32, ClusterWorkGroupIdXYZ,
+                                B.buildMul(S32, ClusterIdXYZ, ClusterSizeXYZ));
+
+  const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+
+  switch (MFI->getClusterDims().getKind()) {
+  case AMDGPU::ClusterDimsAttr::Kind::FixedDims:
+  case AMDGPU::ClusterDimsAttr::Kind::VariableDims: {
+    B.buildCopy(DstReg, GlobalIdXYZ);
+    MI.eraseFromParent();
+    return true;
+  }
+  case AMDGPU::ClusterDimsAttr::Kind::NoCluster: {
+    B.buildCopy(DstReg, ClusterIdXYZ);
+    MI.eraseFromParent();
+    return true;
+  }
+  case AMDGPU::ClusterDimsAttr::Kind::Unknown: {
+    using namespace AMDGPU::Hwreg;
+    unsigned ClusterIdField = HwregEncoding::encode(ID_IB_STS2, 6, 4);
+    Register ClusterId = MRI.createGenericVirtualRegister(S32);
+    MRI.setRegClass(ClusterId, &AMDGPU::SReg_32RegClass);
+    B.buildInstr(AMDGPU::S_GETREG_B32_const)
+        .addDef(ClusterId)
+        .addImm(ClusterIdField);
+    auto Zero = B.buildConstant(S32, 0);
+    auto NoClusters =
+        B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), ClusterId, Zero);
+    B.buildSelect(DstReg, NoClusters, ClusterIdXYZ, GlobalIdXYZ);
+    MI.eraseFromParent();
+    return true;
+  }
+  }
+
+  llvm_unreachable("nothing should reach here");
+}
+
 bool AMDGPULegalizerInfo::loadInputValue(
     Register DstReg, MachineIRBuilder &B,
     AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
@@ -4471,8 +4539,31 @@ bool AMDGPULegalizerInfo::loadInputValue(
       AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
   const ArgDescriptor WorkGroupIDZ =
       ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+  const ArgDescriptor ClusterWorkGroupIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu);
+  const ArgDescriptor ClusterWorkGroupIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u);
+  const ArgDescriptor ClusterWorkGroupIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u);
+  const ArgDescriptor ClusterWorkGroupMaxIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u);
+  const ArgDescriptor ClusterWorkGroupMaxFlatID =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u);
+
+  auto LoadConstant = [&](unsigned N) {
+    B.buildConstant(DstReg, N);
+    return true;
+  };
+
   if (ST.hasArchitectedSGPRs() &&
       (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
+    AMDGPU::ClusterDimsAttr ClusterDims = MFI->getClusterDims();
+    bool HasFixedDims = ClusterDims.isFixedDims();
+
     switch (ArgType) {
     case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
       Arg = &WorkGroupIDX;
@@ -4489,6 +4580,53 @@ bool AMDGPULegalizerInfo::loadInputValue(
       ArgRC = &AMDGPU::SReg_32RegClass;
       ArgTy = LLT::scalar(32);
       break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+      if (HasFixedDims && ClusterDims.getDims()[0] == 1)
+        return LoadConstant(0);
+      Arg = &ClusterWorkGroupIDX;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+      if (HasFixedDims && ClusterDims.getDims()[1] == 1)
+        return LoadConstant(0);
+      Arg = &ClusterWorkGroupIDY;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+      if (HasFixedDims && ClusterDims.getDims()[2] == 1)
+        return LoadConstant(0);
+      Arg = &ClusterWorkGroupIDZ;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[0] - 1);
+      Arg = &ClusterWorkGroupMaxIDX;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[1] - 1);
+      Arg = &ClusterWorkGroupMaxIDY;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[2] - 1);
+      Arg = &ClusterWorkGroupMaxIDZ;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+      Arg = &ClusterWorkGroupMaxFlatID;
+      ArgRC = &AMDGPU::SReg_32RegClass;
+      ArgTy = LLT::scalar(32);
+      break;
     default:
       break;
     }
@@ -4499,10 +4637,9 @@ bool AMDGPULegalizerInfo::loadInputValue(
 
   if (!Arg) {
     if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
-      // The intrinsic may appear when we have a 0 sized kernarg segment, in which
-      // case the pointer argument may be missing and we use null.
-      B.buildConstant(DstReg, 0);
-      return true;
+      // The intrinsic may appear when we have a 0 sized kernarg segment, in
+      // which case the pointer argument may be missing and we use null.
+      return LoadConstant(0);
     }
 
     // It's undefined behavior if a function marked with the amdgpu-no-*
@@ -7415,6 +7552,22 @@ bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
   return true;
 }
 
+bool AMDGPULegalizerInfo::legalizeConstHwRegRead(MachineInstr &MI,
+                                                 MachineIRBuilder &B,
+                                                 AMDGPU::Hwreg::Id HwReg,
+                                                 unsigned LowBit,
+                                                 unsigned Width) const {
+  MachineRegisterInfo &MRI = *B.getMRI();
+  Register DstReg = MI.getOperand(0).getReg();
+  if (!MRI.getRegClassOrNull(DstReg))
+    MRI.setRegClass(DstReg, &AMDGPU::SReg_32RegClass);
+  B.buildInstr(AMDGPU::S_GETREG_B32_const)
+      .addDef(DstReg)
+      .addImm(AMDGPU::Hwreg::HwregEncoding::encode(HwReg, LowBit, Width));
+  MI.eraseFromParent();
+  return true;
+}
+
 static constexpr unsigned FPEnvModeBitField =
     AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23);
 
@@ -7577,14 +7730,64 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     return legalizeWorkitemIDIntrinsic(MI, MRI, B, 2,
                                        AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
   case Intrinsic::amdgcn_workgroup_id_x:
-    return legalizePreloadedArgIntrin(MI, MRI, B,
-                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+    return legalizeWorkGroupId(
+        MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
   case Intrinsic::amdgcn_workgroup_id_y:
-    return legalizePreloadedArgIntrin(MI, MRI, B,
-                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+    return legalizeWorkGroupId(
+        MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
   case Intrinsic::amdgcn_workgroup_id_z:
-    return legalizePreloadedArgIntrin(MI, MRI, B,
+    return legalizeWorkGroupId(
+        MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
+        AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_id_x:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+  case Intrinsic::amdgcn_cluster_id_y:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+  case Intrinsic::amdgcn_cluster_id_z:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(MI, MRI, B,
                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_workgroup_id_x:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
+  case Intrinsic::amdgcn_cluster_workgroup_id_y:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
+  case Intrinsic::amdgcn_cluster_workgroup_id_z:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_workgroup_flat_id:
+    return AMDGPU::isGFX1250(ST) &&
+           legalizeConstHwRegRead(MI, B, AMDGPU::Hwreg::ID_IB_STS2, 21, 4);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z);
+  case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
+    return ST.hasGFX1250Insts() &&
+           legalizePreloadedArgIntrin(
+               MI, MRI, B,
+               AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID);
   case Intrinsic::amdgcn_wave_id:
     return legalizeWaveID(MI, B);
   case Intrinsic::amdgcn_lds_kernel_id:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 1f4e02b0d600a..cd44a9ba0807c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -114,6 +114,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
   void buildLoadInputValue(Register DstReg, MachineIRBuilder &B,
                            const ArgDescriptor *Arg,
                            const TargetRegisterClass *ArgRC, LLT ArgTy) const;
+  bool legalizeWorkGroupId(
+      MachineInstr &MI, MachineIRBuilder &B,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
   bool loadInputValue(Register DstReg, MachineIRBuilder &B,
                       AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
 
@@ -218,6 +223,9 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
 
   bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
   bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
+  bool legalizeConstHwRegRead(MachineInstr &MI, MachineIRBuilder &B,
+                              AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
+                              unsigned Width) const;
 
   bool legalizeGetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI,
                         MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cb3e544449bbf..5c4538c0cc56e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2408,6 +2408,53 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG,
   return ArgValue;
 }
 
+SDValue SITargetLowering::lowerWorkGroupId(
+    SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
+    AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+    AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const {
+  if (!Subtarget->hasClusters())
+    return getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV);
+
+  // Clusters are supported. Return the global position in the grid. If clusters
+  // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID.
+
+  // WorkGroupIdXYZ = ClusterId == 0 ?
+  //   ClusterIdXYZ :
+  //   ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ
+  SDValue ClusterIdXYZ = getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV);
+  SDLoc SL(ClusterIdXYZ);
+  SDValue ClusterMaxIdXYZ = getPreloadedValue(DAG, MFI, VT, ClusterMaxIdPV);
+  SDValue One = DAG.getConstant(1, SL, VT);
+  SDValue ClusterSizeXYZ = DAG.getNode(ISD::ADD, SL, VT, ClusterMaxIdXYZ, One);
+  SDValue ClusterWorkGroupIdXYZ =
+      getPreloadedValue(DAG, MFI, VT, ClusterWorkGroupIdPV);
+  SDValue GlobalIdXYZ =
+      DAG.getNode(ISD::ADD, SL, VT, ClusterWorkGroupIdXYZ,
+                  DAG.getNode(ISD::MUL, SL, VT, ClusterIdXYZ, ClusterSizeXYZ));
+
+  switch (MFI.getClusterDims().getKind()) {
+  case AMDGPU::ClusterDimsAttr::Kind::FixedDims:
+  case AMDGPU::ClusterDimsAttr::Kind::VariableDims:
+    return GlobalIdXYZ;
+  case AMDGPU::ClusterDimsAttr::Kind::NoCluster:
+    return ClusterIdXYZ;
+  case AMDGPU::ClusterDimsAttr::Kind::Unknown: {
+    using namespace AMDGPU::Hwreg;
+    SDValue ClusterIdField =
+        DAG.getTargetConstant(HwregEncoding::encode(ID_IB_STS2, 6, 4), SL, VT);
+    SDNode *GetReg =
+        DAG.getMachineNode(AMDGPU::S_GETREG_B32_const, SL, VT, ClusterIdField);
+    SDValue ClusterId(GetReg, 0);
+    SDValue Zero = DAG.getConstant(0, SL, VT);
+    return DAG.getNode(ISD::SELECT_CC, SL, VT, ClusterId, Zero, ClusterIdXYZ,
+                       GlobalIdXYZ, DAG.getCondCode(ISD::SETEQ));
+  }
+  }
+
+  llvm_unreachable("nothing should reach here");
+}
+
 SDValue SITargetLowering::getPreloadedValue(
     SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
     AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
@@ -2426,9 +2473,30 @@ SDValue SITargetLowering::getPreloadedValue(
       AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
   const ArgDescriptor WorkGroupIDZ =
       ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+  const ArgDescriptor ClusterWorkGroupIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu);
+  const ArgDescriptor ClusterWorkGroupIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u);
+  const ArgDescriptor ClusterWorkGroupIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u);
+  const ArgDescriptor ClusterWorkGroupMaxIDX =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDY =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u);
+  const ArgDescriptor ClusterWorkGroupMaxIDZ =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u);
+  const ArgDescriptor ClusterWorkGroupMaxFlatID =
+      ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u);
+
+  auto LoadConstant = [&](unsigned N) {
+    return DAG.getConstant(N, SDLoc(), VT);
+  };
+
   if (Subtarget->hasArchitectedSGPRs() &&
-      (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx ||
-       CC == CallingConv::AMDGPU_Gfx_WholeWave)) {
+      (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
+    AMDGPU::ClusterDimsAttr ClusterDims = MFI.getClusterDims();
+    bool HasFixedDims = ClusterDims.isFixedDims();
+
     switch (PVID) {
     case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
       Reg = &WorkGroupIDX;
@@ -2445,6 +2513,53 @@ SDValue SITargetLowering::getPreloadedValue(
       RC = &AMDGPU::SReg_32RegClass;
       Ty = LLT::scalar(32);
       break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
+      if (HasFixedDims && ClusterDims.getDims()[0] == 1)
+        return LoadConstant(0);
+      Reg = &ClusterWorkGroupIDX;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
+      if (HasFixedDims && ClusterDims.getDims()[1] == 1)
+        return LoadConstant(0);
+      Reg = &ClusterWorkGroupIDY;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
+      if (HasFixedDims && ClusterDims.getDims()[2] == 1)
+        return LoadConstant(0);
+      Reg = &ClusterWorkGroupIDZ;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[0] - 1);
+      Reg = &ClusterWorkGroupMaxIDX;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[1] - 1);
+      Reg = &ClusterWorkGroupMaxIDY;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
+      if (HasFixedDims)
+        return LoadConstant(ClusterDims.getDims()[2] - 1);
+      Reg = &ClusterWorkGroupMaxIDZ;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
+    case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
+      Reg = &ClusterWorkGroupMaxFlatID;
+      RC = &AMDGPU::SReg_32RegClass;
+      Ty = LLT::scalar(32);
+      break;
     default:
       break;
     }
@@ -9528,6 +9643,19 @@ SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
                      DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
 }
 
+SDValue SITargetLowering::lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
+                                              AMDGPU::Hwreg::Id HwReg,
+                                              unsigned LowBit,
+                                              unsigned Width) const {
+  SDLoc SL(Op);
+  using namespace AMDGPU::Hwreg;
+  return {DAG.getMachineNode(
+              AMDGPU::S_GETREG_B32_const, SL, MVT::i32,
+              DAG.getTargetConstant(HwregEncoding::encode(HwReg, LowBit, Width),
+                                    SL, MVT::i32)),
+          0};
+}
+
 SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
                                           unsigned Dim,
                                           const ArgDescriptor &Arg) const {
@@ -9674,14 +9802,81 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return lowerImplicitZextParam(DAG, Op, MVT::i16,
                                   SI::KernelInputOffsets::LOCAL_SIZE_Z);
   case Intrinsic::amdgcn_workgroup_id_x:
-    return getPreloadedValue(DAG, *MFI, VT,
-                             AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+    return lowerWorkGroupId(DAG, *MFI, VT,
+                            AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
   case Intrinsic::amdgcn_workgroup_id_y:
-    return getPreloadedValue(DAG, *MFI, VT,
-                             AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+    return lowerWorkGroupId(DAG, *MFI, VT,
+                            AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
   case Intrinsic::amdgcn_workgroup_id_z:
-    return getPreloadedValue(DAG, *MFI, VT,
-                             AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+    return lowerWorkGroupId(DAG, *MFI, VT,
+                            AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
+                            AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_cluster_id_x:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(DAG, *MFI, VT,
+                                   AMDGPUFunctionArgInfo::WORKGROUP_ID_X)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_id_y:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(DAG, *MFI, VT,
+                                   AMDGPUFunctionArgInfo::WORKGROUP_ID_Y)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_id_z:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(DAG, *MFI, VT,
+                                   AMDGPUFunctionArgInfo::WORKGROUP_ID_Z)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_id_x:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_id_y:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_id_z:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_flat_id:
+    return AMDGPU::isGFX1250(*Subtarget)
+               ? lowerConstHwRegRead(DAG, Op, AMDGPU::Hwreg::ID_IB_STS2, 21, 4)
+               : SDValue();
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z)
+               : DAG.getUNDEF(VT);
+  case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
+    return Subtarget->hasGFX1250Insts()
+               ? getPreloadedValue(
+                     DAG, *MFI, VT,
+                     AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID)
+               : DAG.getUNDEF(VT);
   case Intrinsic::amdgcn_wave_id:
     return lowerWaveID(DAG, Op);
   case Intrinsic::amdgcn_lds_kernel_id: {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 4886fcf9fd012..48f729c260c69 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -16,6 +16,7 @@
 
 #include "AMDGPUArgumentUsageInfo.h"
 #include "AMDGPUISelLowering.h"
+#include "SIDefines.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
@@ -61,6 +62,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
                               const SDLoc &SL, SDValue Chain,
                               const ISD::InputArg &Arg) const;
+  SDValue lowerWorkGroupId(
+      SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
+      AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
   SDValue getPreloadedValue(SelectionDAG &DAG,
                             const SIMachineFunctionInfo &MFI,
                             EVT VT,
@@ -81,6 +87,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                                         unsigned NewOpcode) const;
 
   SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
+  SDValue lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
+                              AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
+                              unsigned Width) const;
   SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
                           const ArgDescriptor &ArgDesc) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f7dde2b90b68e..a80bd81c7ea11 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -926,7 +926,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
            Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
            Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
-           Opcode == AMDGPU::S_GETREG_B32;
+           Opcode == AMDGPU::S_GETREG_B32 ||
+           Opcode == AMDGPU::S_GETREG_B32_const;
   }
 
   /// \returns true if this is an s_store_dword* instruction. This is more
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 54426d33d3473..1f11be475e9f8 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -195,6 +195,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
     VGPRForAGPRCopy =
         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
   }
+
+  ClusterDims = AMDGPU::ClusterDimsAttr::get(F);
 }
 
 MachineFunctionInfo *SIMachineFunctionInfo::clone(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ca8f8033a2d54..45606153db58e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -465,6 +465,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   // Default/requested number of work groups for the function.
   SmallVector<unsigned> MaxNumWorkGroups = {0, 0, 0};
 
+  // Requested cluster dimensions.
+  AMDGPU::ClusterDimsAttr ClusterDims;
+
 private:
   unsigned NumUserSGPRs = 0;
   unsigned NumSystemSGPRs = 0;
@@ -1207,6 +1210,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; }
   unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; }
   unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; }
+
+  AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index fe94887cdff98..296ce5a46287c 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1127,19 +1127,26 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
   "$sdst, $simm16"
 >;
 
-// This is hasSideEffects to allow its use in readcyclecounter selection.
 // FIXME: Need to truncate immediate to 16-bits.
-// FIXME: Should have separate pseudos for known may read MODE and
-// only read MODE.
-def S_GETREG_B32 : SOPK_Pseudo <
+class S_GETREG_B32_Pseudo<list<dag> pattern=[]> : SOPK_Pseudo <
   "s_getreg_b32",
   (outs SReg_32:$sdst), (ins hwreg:$simm16),
-  "$sdst, $simm16",
-  [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
+  "$sdst, $simm16", pattern>;
+
+// This is hasSideEffects to allow its use in readcyclecounter selection.
+// FIXME: Should have separate pseudos for known may read MODE and
+// only read MODE.
+def S_GETREG_B32 : S_GETREG_B32_Pseudo<
+    [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
   let hasSideEffects = 1;
   let Uses = [MODE];
 }
 
+// A version of the pseudo for reading hardware register fields that are
+// known to remain the same during the course of the run. Has no side
+// effects and doesn't read MODE.
+def S_GETREG_B32_const : S_GETREG_B32_Pseudo;
+
 let Defs = [MODE], Uses = [MODE] in {
 
 // FIXME: Need to truncate immediate to 16-bits.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 40da4f96aefdb..faae1fee342af 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3533,6 +3533,54 @@ bool isPackedFP32Inst(unsigned Opc) {
   }
 }
 
+const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
+  assert(isFixedDims() && "expect kind to be FixedDims");
+  return Dims;
+}
+
+std::string ClusterDimsAttr::to_string() const {
+  SmallString<10> Buffer;
+  raw_svector_ostream OS(Buffer);
+
+  switch (getKind()) {
+  case Kind::Unknown:
+    return "";
+  case Kind::NoCluster: {
+    OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
+    return Buffer.c_str();
+  }
+  case Kind::VariableDims: {
+    OS << EncoVariableDims << ',' << EncoVariableDims << ','
+       << EncoVariableDims;
+    return Buffer.c_str();
+  }
+  case Kind::FixedDims: {
+    OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
+    return Buffer.c_str();
+  }
+  }
+  llvm_unreachable("Unknown ClusterDimsAttr kind");
+}
+
+ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
+  std::optional<SmallVector<unsigned>> Attr =
+      getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
+  ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
+
+  if (!Attr.has_value())
+    AttrKind = Kind::Unknown;
+  else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; }))
+    AttrKind = Kind::NoCluster;
+  else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; }))
+    AttrKind = Kind::VariableDims;
+
+  ClusterDimsAttr A(AttrKind);
+  if (AttrKind == Kind::FixedDims)
+    A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
+
+  return A;
+}
+
 } // namespace AMDGPU
 
 raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 3fcd16f9290b1..3f8d43db5a48c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1813,6 +1813,50 @@ bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
 /// must be defined in terms of bytes.
 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
 
+class ClusterDimsAttr {
+public:
+  enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
+
+  ClusterDimsAttr() = default;
+
+  Kind getKind() const { return AttrKind; }
+
+  bool isUnknown() const { return getKind() == Kind::Unknown; }
+
+  bool isNoCluster() const { return getKind() == Kind::NoCluster; }
+
+  bool isFixedDims() const { return getKind() == Kind::FixedDims; }
+
+  bool isVariableedDims() const { return getKind() == Kind::VariableDims; }
+
+  void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }
+
+  void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); }
+
+  void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); }
+
+  /// \returns the dims stored. Note that this function can only be called if
+  /// the kind is \p Fixed.
+  const std::array<unsigned, 3> &getDims() const;
+
+  bool operator==(const ClusterDimsAttr &RHS) const {
+    return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
+  }
+
+  std::string to_string() const;
+
+  static ClusterDimsAttr get(const Function &F);
+
+private:
+  enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
+
+  ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
+
+  std::array<unsigned, 3> Dims = {0, 0, 0};
+
+  Kind AttrKind = Kind::Unknown;
+};
+
 } // end namespace AMDGPU
 
 raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
new file mode 100644
index 0000000000000..aa3b7b3606fd8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
@@ -0,0 +1,1258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.id.x() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.id.y() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.id.z() #0
+
+define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_x:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_x:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_x:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_and_b32 s2, ttmp6, 15
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1,2,2" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_x_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_x_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_x_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_y:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_y:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_y:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_y_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_y_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_y_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_z:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_z:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_z:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_flat_id(ptr addrspace(1) %out) {
+; CHECK-UNKNOWN-LABEL: test_workgroup_flat_id:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_flat_id:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_flat_id:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_flat_id:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4)
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.flat.id()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,2,1" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_id_z_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_id_z_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_id_z_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 1
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
new file mode 100644
index 0000000000000..afe37e371fbc3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
@@ -0,0 +1,194 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() #0
+
+define amdgpu_kernel void @test_workgroup_max_flat_id(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_flat_id:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_flat_id:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_flat_id:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_flat_id:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40018
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
new file mode 100644
index 0000000000000..7ea4fa5373e57
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
@@ -0,0 +1,1077 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s
+; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s
+
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.x() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.y() #0
+declare i32 @llvm.amdgcn.cluster.workgroup.max.id.z() #0
+
+define amdgpu_kernel void @test_workgroup_max_id_x(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_x:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_y(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_y:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_z(ptr addrspace(1) %out) #1 {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_z:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-G-UNKNOWN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    s_bfe_u32 s2, ttmp6, 0x40014
+; CHECK-G-MESA3D-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_workgroup_max_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" {
+; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-UNKNOWN:       ; %bb.0:
+; CHECK-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6
+; CHECK-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-UNKNOWN-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-MESA3D-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-MESA3D:         .amd_kernel_code_t
+; CHECK-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     priority = 0
+; CHECK-MESA3D-NEXT:     float_mode = 240
+; CHECK-MESA3D-NEXT:     priv = 0
+; CHECK-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-MESA3D-NEXT:     debug_mode = 0
+; CHECK-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-MESA3D-NEXT:     enable_exception = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-MESA3D-NEXT:     private_element_size = 1
+; CHECK-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-MESA3D-NEXT:     call_convention = -1
+; CHECK-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-MESA3D-NEXT:  ; %bb.0:
+; CHECK-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-MESA3D-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6
+; CHECK-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-MESA3D-NEXT:    global_store_b32 v0, v1, s[0:1]
+; CHECK-MESA3D-NEXT:    s_endpgm
+;
+; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-G-UNKNOWN:       ; %bb.0:
+; CHECK-G-UNKNOWN-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; CHECK-G-UNKNOWN-NEXT:    v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0
+; CHECK-G-UNKNOWN-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-UNKNOWN-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-UNKNOWN-NEXT:    s_endpgm
+;
+; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z_optimized:
+; CHECK-G-MESA3D:         .amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:     amd_code_version_major = 1
+; CHECK-G-MESA3D-NEXT:     amd_code_version_minor = 2
+; CHECK-G-MESA3D-NEXT:     amd_machine_kind = 1
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_major = 12
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_minor = 5
+; CHECK-G-MESA3D-NEXT:     amd_machine_version_stepping = 0
+; CHECK-G-MESA3D-NEXT:     kernel_code_entry_byte_offset = 256
+; CHECK-G-MESA3D-NEXT:     kernel_code_prefetch_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     granulated_workitem_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     granulated_wavefront_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     priority = 0
+; CHECK-G-MESA3D-NEXT:     float_mode = 240
+; CHECK-G-MESA3D-NEXT:     priv = 0
+; CHECK-G-MESA3D-NEXT:     enable_dx10_clamp = 0
+; CHECK-G-MESA3D-NEXT:     debug_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_ieee_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_wgp_mode = 0
+; CHECK-G-MESA3D-NEXT:     enable_mem_ordered = 1
+; CHECK-G-MESA3D-NEXT:     enable_fwd_progress = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; CHECK-G-MESA3D-NEXT:     user_sgpr_count = 8
+; CHECK-G-MESA3D-NEXT:     enable_trap_handler = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_x = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_y = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_id_z = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_workgroup_info = 0
+; CHECK-G-MESA3D-NEXT:     enable_vgpr_workitem_id = 2
+; CHECK-G-MESA3D-NEXT:     enable_exception_msb = 0
+; CHECK-G-MESA3D-NEXT:     granulated_lds_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_exception = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_buffer = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_queue_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_dispatch_id = 1
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_flat_scratch_init = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_private_segment_size = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; CHECK-G-MESA3D-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; CHECK-G-MESA3D-NEXT:     enable_wavefront_size32 = 1
+; CHECK-G-MESA3D-NEXT:     enable_ordered_append_gds = 0
+; CHECK-G-MESA3D-NEXT:     private_element_size = 1
+; CHECK-G-MESA3D-NEXT:     is_ptr64 = 1
+; CHECK-G-MESA3D-NEXT:     is_dynamic_callstack = 0
+; CHECK-G-MESA3D-NEXT:     is_debug_enabled = 0
+; CHECK-G-MESA3D-NEXT:     is_xnack_enabled = 0
+; CHECK-G-MESA3D-NEXT:     workitem_private_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     workgroup_group_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     gds_segment_byte_size = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_byte_size = 24
+; CHECK-G-MESA3D-NEXT:     workgroup_fbarrier_count = 0
+; CHECK-G-MESA3D-NEXT:     wavefront_sgpr_count = 6
+; CHECK-G-MESA3D-NEXT:     workitem_vgpr_count = 2
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_vgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_first = 0
+; CHECK-G-MESA3D-NEXT:     reserved_sgpr_count = 0
+; CHECK-G-MESA3D-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     debug_private_segment_buffer_sgpr = 0
+; CHECK-G-MESA3D-NEXT:     kernarg_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     group_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     private_segment_alignment = 4
+; CHECK-G-MESA3D-NEXT:     wavefront_size = 5
+; CHECK-G-MESA3D-NEXT:     call_convention = -1
+; CHECK-G-MESA3D-NEXT:     runtime_loader_kernel_symbol = 0
+; CHECK-G-MESA3D-NEXT:    .end_amd_kernel_code_t
+; CHECK-G-MESA3D-NEXT:  ; %bb.0:
+; CHECK-G-MESA3D-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CHECK-G-MESA3D-NEXT:    v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0
+; CHECK-G-MESA3D-NEXT:    s_wait_kmcnt 0x0
+; CHECK-G-MESA3D-NEXT:    global_store_b32 v1, v0, s[0:1]
+; CHECK-G-MESA3D-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 2554d99def57f..169a84ff1f86b 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -297,6 +297,6 @@ declare i32 @llvm.amdgcn.workgroup.id.y()
 declare i32 @llvm.amdgcn.workgroup.id.z()
 declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
 
-attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
+attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX9ARCH: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
new file mode 100644
index 0000000000000..69439d49e588f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck -check-prefix=GFX1250-GISEL %s
+
+define void @test_workgroup_id_x_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, ttmp9, s1
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, ttmp9, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, ttmp9
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, ttmp9
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_x_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_lshl_b32 s0, ttmp9, 1
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_lshl1_add_u32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.x()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, s1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s2, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_y_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.y()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel(ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, s1, s0
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s3, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, s1, s2
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, s1
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s2, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+define void @test_workgroup_id_z_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
+; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 15
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s1, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, s0, 0x1fffe
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s1, s0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-SDAG-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s1, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_lshl1_add_u32 s0, s0, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-GISEL-NEXT:    s_set_pc_i64 s[30:31]
+  %id = call i32 @llvm.amdgcn.workgroup.id.z()
+  store i32 %id, ptr addrspace(1) %out
+  ret void
+}
+
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
new file mode 100644
index 0000000000000..497241cff392d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
@@ -0,0 +1,376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs < %s | FileCheck -check-prefix=GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel < %s | FileCheck -check-prefix=GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel < %s | FileCheck -check-prefix=GFX12-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -global-isel < %s | FileCheck -check-prefix=GFX1250-GISEL %s
+
+define amdgpu_cs void @_amdgpu_cs_main() {
+; GFX9-SDAG-LABEL: _amdgpu_cs_main:
+; GFX9-SDAG:       ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: _amdgpu_cs_main:
+; GFX9-GISEL:       ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: _amdgpu_cs_main:
+; GFX12-SDAG:       ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: _amdgpu_cs_main:
+; GFX12-GISEL:       ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: _amdgpu_cs_main:
+; GFX1250-SDAG:       ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_and_b32 s3, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s4, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s3, s2
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s5, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s4, s4, 1
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, s0
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, s5, s4
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s4, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s4, s4, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s0, s5, s4
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s1, ttmp9, s1
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s2, s3, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: _amdgpu_cs_main:
+; GFX1250-GISEL:       ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s0, ttmp9, s1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s1, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s3, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s4, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s3, s1
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s4, s4, s1
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s1, s3, s4
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s3, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s4, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s3, s3, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s5, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s3, s4, s3
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s5, s5, s3
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s2, s4, s5
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT:    s_endpgm
+.entry:
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+  %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+  %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+  %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_cs void @workgroup_id_no_clusters() "amdgpu-cluster-dims"="0,0,0" {
+; GFX9-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX9-SDAG:       ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX9-GISEL:       ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX12-SDAG:       ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX12-GISEL:       ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_no_clusters:
+; GFX1250-SDAG:       ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX1250-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_no_clusters:
+; GFX1250-GISEL:       ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT:    s_endpgm
+.entry:
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+  %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+  %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+  %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_cs void @workgroup_id_optimized() "amdgpu-cluster-dims"="2,3,4" {
+; GFX9-SDAG-LABEL: workgroup_id_optimized:
+; GFX9-SDAG:       ; %bb.0: ; %.entry
+; GFX9-SDAG-NEXT:    s_lshr_b32 s0, ttmp7, 16
+; GFX9-SDAG-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: workgroup_id_optimized:
+; GFX9-GISEL:       ; %bb.0: ; %.entry
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX9-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX9-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: workgroup_id_optimized:
+; GFX12-SDAG:       ; %bb.0: ; %.entry
+; GFX12-SDAG-NEXT:    s_and_b32 s0, ttmp7, 0xffff
+; GFX12-SDAG-NEXT:    s_lshr_b32 s1, ttmp7, 16
+; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s1
+; GFX12-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: workgroup_id_optimized:
+; GFX12-GISEL:       ; %bb.0: ; %.entry
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, ttmp9
+; GFX12-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX12-GISEL-NEXT:    s_lshr_b32 s2, ttmp7, 16
+; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_optimized:
+; GFX1250-SDAG:       ; %bb.0: ; %.entry
+; GFX1250-SDAG-NEXT:    s_lshl_b32 s0, ttmp9, 1
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s2, ttmp7, 14
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_and_b32 s0, s2, 0x3fffc
+; GFX1250-SDAG-NEXT:    s_and_b32 s2, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s3, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_mul_i32 s2, s2, 3
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s4, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s3, s3, s0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s4, s4, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s4
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s3
+; GFX1250-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_optimized:
+; GFX1250-GISEL:       ; %bb.0: ; %.entry
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_and_b32 s0, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s1, s1, 3
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s3, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s4, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_lshl1_add_u32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s2, s1
+; GFX1250-GISEL-NEXT:    s_lshl2_add_u32 s2, s3, s4
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], null
+; GFX1250-GISEL-NEXT:    s_endpgm
+.entry:
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
+  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
+  %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
+  %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
+  %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_cs void @caller() {
+; GFX9-SDAG-LABEL: caller:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_getpc_b64 s[8:9]
+; GFX9-SDAG-NEXT:    s_mov_b32 s8, s0
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-SDAG-NEXT:    s_mov_b32 s5, callee at abs32@hi
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, callee at abs32@lo
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_add_u32 s8, s8, s0
+; GFX9-SDAG-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GFX9-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GFX9-SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: caller:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_getpc_b64 s[8:9]
+; GFX9-GISEL-NEXT:    s_mov_b32 s8, s0
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x10
+; GFX9-GISEL-NEXT:    s_mov_b32 s4, callee at abs32@lo
+; GFX9-GISEL-NEXT:    s_mov_b32 s5, callee at abs32@hi
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX9-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_add_u32 s8, s8, s0
+; GFX9-GISEL-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-GISEL-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GFX9-GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: caller:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX12-SDAG-NEXT:    s_mov_b32 s1, callee at abs32@hi
+; GFX12-SDAG-NEXT:    s_mov_b32 s0, callee at abs32@lo
+; GFX12-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
+; GFX12-SDAG-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: caller:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, ttmp9
+; GFX12-GISEL-NEXT:    s_mov_b32 s0, callee at abs32@lo
+; GFX12-GISEL-NEXT:    s_mov_b32 s1, callee at abs32@hi
+; GFX12-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12-GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GFX12-GISEL-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: caller:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-SDAG-NEXT:    s_mov_b32 s32, 0
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s2, ttmp9, s1
+; GFX1250-SDAG-NEXT:    s_mov_b64 s[0:1], callee at abs64
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-NEXT:    s_swap_pc_i64 s[30:31], s[0:1]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: caller:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    s_mov_b32 s32, 0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s2, ttmp9, s1
+; GFX1250-GISEL-NEXT:    s_mov_b64 s[0:1], callee at abs64
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-NEXT:    s_swap_pc_i64 s[30:31], s[0:1]
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
+  call amdgpu_gfx void @callee(i32 %idx)
+  ret void
+}
+
+declare amdgpu_gfx void @callee(i32)
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 25609e881254e..b2bcb74e4184f 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -4089,32 +4089,44 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %
 ; GFX1250-NEXT:    s_add_co_i32 s0, s10, 1
 ; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v1, s0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1250-NEXT:    v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1
 ; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_and_b32 s4, ttmp6, 15
+; GFX1250-NEXT:    s_getreg_b32 s5, hwreg(HW_REG_IB_STS2, 6, 4)
 ; GFX1250-NEXT:    v_mul_lo_u32 v2, v2, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v3, v2, v1
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    s_load_b32 s2, s[2:3], 0x4
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_bfe_u32 s3, ttmp6, 0x4000c
 ; GFX1250-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX1250-NEXT:    s_add_co_i32 s3, s3, 1
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_mul_i32 s3, ttmp9, s3
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v1, v3, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_add_co_i32 s4, s4, s3
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v1, v1, v2
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v2, 1, v3
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX1250-NEXT:    s_cmp_eq_u32 s5, 0
 ; GFX1250-NEXT:    v_mul_lo_u32 v3, v1, v2
-; GFX1250-NEXT:    v_mad_u32 v0, ttmp9, s2, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    s_cselect_b32 s3, ttmp9, s4
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-NEXT:    v_mad_u32 v0, s3, s2, v0
 ; GFX1250-NEXT:    v_add_nc_u32_e32 v2, v3, v2
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1250-NEXT:    v_mul_lo_u32 v2, v2, v1
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX1250-NEXT:    v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250-NEXT:    v_mad_u32 v3, v2, v3, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1250-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1250-NEXT:    v_mad_u32 v2, v3, v2, v3
 ; GFX1250-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX1250-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index 7a64e55abb8d3..afca83a7e1c36 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
 
 define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
 ;
@@ -15,6 +17,50 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
 ; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
+; GFX1200-LABEL: workgroup_id_x:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1200-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_x:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_and_b32 s3, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_mul_i32 s2, ttmp9, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s3, s3, s2
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s4, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s2, ttmp9, s3
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_x:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s2, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s3, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s2, s2, 1
+; GFX1250-GISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s2, ttmp9, s2
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s3, s3, s2
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s4, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s2, ttmp9, s3
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT:    s_endpgm
 ; GFX12-LABEL: workgroup_id_x:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
@@ -41,6 +87,74 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
 ; GFX9-NEXT:    global_store_dword v1, v2, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
+; GFX1200-LABEL: workgroup_id_xy:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1200-NEXT:    s_and_b32 s4, ttmp7, 0xffff
+; GFX1200-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    s_clause 0x1
+; GFX1200-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_xy:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s6, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_and_b32 s4, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s6, s6, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s7, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_mul_i32 s5, s4, s6
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s6, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s7, s7, 1
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s6, s6, s5
+; GFX1250-SDAG-NEXT:    s_and_b32 s5, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_mul_i32 s7, ttmp9, s7
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s8, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s5, s5, s7
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s8, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s5, ttmp9, s5
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s4, s4, s6
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s5
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v2, s[2:3]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_xy:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s6, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s6, s6, 1
+; GFX1250-GISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT:    s_and_b32 s4, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_mul_i32 s5, ttmp9, s6
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s4, s4, s5
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s4, ttmp9, s4
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s5, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s7, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s5, s5, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s8, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s5, s7, s5
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s8, s8, s5
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s4, s7, s8
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_clause 0x1
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1250-GISEL-NEXT:    s_endpgm
 ; GFX12-LABEL: workgroup_id_xy:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
@@ -77,6 +191,99 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
 ; GFX9-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX9-NEXT:    s_endpgm
 ;
+; GFX1200-LABEL: workgroup_id_xyz:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    s_clause 0x1
+; GFX1200-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1200-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1200-NEXT:    s_and_b32 s6, ttmp7, 0xffff
+; GFX1200-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
+; GFX1200-NEXT:    s_lshr_b32 s7, ttmp7, 16
+; GFX1200-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1200-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    s_clause 0x2
+; GFX1200-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1200-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1200-NEXT:    global_store_b32 v1, v3, s[4:5]
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: workgroup_id_xyz:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s0, ttmp6, 0x40014
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s6, ttmp7, 16
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s7, s0, 1
+; GFX1250-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-SDAG-NEXT:    s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s9, ttmp6, 0x40010
+; GFX1250-SDAG-NEXT:    s_mul_i32 s7, s6, s7
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s8, ttmp6, 0x40008
+; GFX1250-SDAG-NEXT:    s_and_b32 s10, ttmp7, 0xffff
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s9, s9, 1
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s11, ttmp6, 0x4000c
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s8, s8, s7
+; GFX1250-SDAG-NEXT:    s_mul_i32 s7, s10, s9
+; GFX1250-SDAG-NEXT:    s_bfe_u32 s9, ttmp6, 0x40004
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s11, s11, 1
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s9, s9, s7
+; GFX1250-SDAG-NEXT:    s_and_b32 s7, ttmp6, 15
+; GFX1250-SDAG-NEXT:    s_mul_i32 s11, ttmp9, s11
+; GFX1250-SDAG-NEXT:    s_getreg_b32 s12, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-SDAG-NEXT:    s_add_co_i32 s7, s7, s11
+; GFX1250-SDAG-NEXT:    s_cmp_eq_u32 s12, 0
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s7, ttmp9, s7
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s7
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s7, s10, s9
+; GFX1250-SDAG-NEXT:    s_cselect_b32 s6, s6, s8
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s6
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_clause 0x2
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v2, s[2:3]
+; GFX1250-SDAG-NEXT:    global_store_b32 v0, v3, s[4:5]
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: workgroup_id_xyz:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x4000c
+; GFX1250-GISEL-NEXT:    s_and_b32 s1, ttmp6, 15
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4)
+; GFX1250-GISEL-NEXT:    s_mul_i32 s0, ttmp9, s0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s1, s1, s0
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s7, ttmp9, s1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s0, ttmp6, 0x40010
+; GFX1250-GISEL-NEXT:    s_and_b32 s8, ttmp7, 0xffff
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s9, ttmp6, 0x40004
+; GFX1250-GISEL-NEXT:    s_mul_i32 s10, s8, s0
+; GFX1250-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-GISEL-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s9, s9, s10
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v0, s7
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s8, s8, s9
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s9, ttmp6, 0x40014
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s10, ttmp7, 16
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s9, s9, 1
+; GFX1250-GISEL-NEXT:    s_bfe_u32 s11, ttmp6, 0x40008
+; GFX1250-GISEL-NEXT:    s_mul_i32 s9, s10, s9
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_add_co_i32 s11, s11, s9
+; GFX1250-GISEL-NEXT:    s_cmp_eq_u32 s6, 0
+; GFX1250-GISEL-NEXT:    s_cselect_b32 s6, s10, s11
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v3, s6
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_clause 0x2
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v2, s[2:3]
+; GFX1250-GISEL-NEXT:    global_store_b32 v1, v3, s[4:5]
+; GFX1250-GISEL-NEXT:    s_endpgm
 ; GFX12-LABEL: workgroup_id_xyz:
 ; GFX12:       ; %bb.0:
 ; GFX12-NEXT:    s_clause 0x1
@@ -107,7 +314,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
 declare i32 @llvm.amdgcn.workgroup.id.y()
 declare i32 @llvm.amdgcn.workgroup.id.z()
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
+; GFX1250: {{.*}}
 ; GFX9-GISEL: {{.*}}
 ; GFX9-SDAG: {{.*}}

>From 6c2e11083fdb5afd7e450a925d93a91f3260cfc2 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 11 Sep 2025 15:14:22 -0400
Subject: [PATCH 2/3] fix comments

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 42 +++++++++++------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5c4538c0cc56e..dfb6fd1a7b7a2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9817,66 +9817,66 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                             AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
                             AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
   case Intrinsic::amdgcn_cluster_id_x:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(DAG, *MFI, VT,
                                    AMDGPUFunctionArgInfo::WORKGROUP_ID_X)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_id_y:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(DAG, *MFI, VT,
                                    AMDGPUFunctionArgInfo::WORKGROUP_ID_Y)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_id_z:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(DAG, *MFI, VT,
                                    AMDGPUFunctionArgInfo::WORKGROUP_ID_Z)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_workgroup_id_x:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(
                      DAG, *MFI, VT,
                      AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_workgroup_id_y:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(
                      DAG, *MFI, VT,
                      AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_workgroup_id_z:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(
                      DAG, *MFI, VT,
                      AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_workgroup_flat_id:
-    return AMDGPU::isGFX1250(*Subtarget)
+    return Subtarget->hasClusters()
                ? lowerConstHwRegRead(DAG, Op, AMDGPU::Hwreg::ID_IB_STS2, 21, 4)
                : SDValue();
   case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(
                      DAG, *MFI, VT,
                      AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(
                      DAG, *MFI, VT,
                      AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(
                      DAG, *MFI, VT,
                      AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
-    return Subtarget->hasGFX1250Insts()
+    return Subtarget->hasClusters()
                ? getPreloadedValue(
                      DAG, *MFI, VT,
                      AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID)
-               : DAG.getUNDEF(VT);
+               : DAG.getPOISON(VT);
   case Intrinsic::amdgcn_wave_id:
     return lowerWaveID(DAG, Op);
   case Intrinsic::amdgcn_lds_kernel_id: {

>From aa5ace8976a0a5bcb671c36f4e73f297a4e9ffe4 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 11 Sep 2025 15:18:36 -0400
Subject: [PATCH 3/3] more fix

---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b5a41e3fbf8fb..b6b82cadcc25a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7745,46 +7745,46 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
         AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z,
         AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
   case Intrinsic::amdgcn_cluster_id_x:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(MI, MRI, B,
                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
   case Intrinsic::amdgcn_cluster_id_y:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(MI, MRI, B,
                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
   case Intrinsic::amdgcn_cluster_id_z:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(MI, MRI, B,
                                       AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
   case Intrinsic::amdgcn_cluster_workgroup_id_x:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(
                MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X);
   case Intrinsic::amdgcn_cluster_workgroup_id_y:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(
                MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y);
   case Intrinsic::amdgcn_cluster_workgroup_id_z:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(
                MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z);
   case Intrinsic::amdgcn_cluster_workgroup_flat_id:
     return AMDGPU::isGFX1250(ST) &&
            legalizeConstHwRegRead(MI, B, AMDGPU::Hwreg::ID_IB_STS2, 21, 4);
   case Intrinsic::amdgcn_cluster_workgroup_max_id_x:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(
                MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X);
   case Intrinsic::amdgcn_cluster_workgroup_max_id_y:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(
                MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y);
   case Intrinsic::amdgcn_cluster_workgroup_max_id_z:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(
                MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z);
   case Intrinsic::amdgcn_cluster_workgroup_max_flat_id:
-    return ST.hasGFX1250Insts() &&
+    return ST.hasClusters() &&
            legalizePreloadedArgIntrin(
                MI, MRI, B,
                AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID);



More information about the llvm-commits mailing list