[llvm] [mlir] Revert "[mlir][amdgpu] Expose waitcnt bitpacking infra (#172313)" (PR #172636)

Wed Dec 17 03:42:04 PST 2025

https://github.com/Hardcode84 created https://github.com/llvm/llvm-project/pull/172636

This reverts commit 93013817afabe23a07073528481856b3507b6faf.

Revert https://github.com/llvm/llvm-project/pull/172313

Missing libraries, again

>From aa4ea70358bdfc4389a44181cb3657f8543cf50f Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Wed, 17 Dec 2025 12:34:41 +0100
Subject: [PATCH] Revert "[mlir][amdgpu] Expose waitcnt bitpacking infra
 (#172313)"

This reverts commit 93013817afabe23a07073528481856b3507b6faf.
---
 llvm/include/llvm/Support/AMDGPUWaitcnt.h     | 207 ------------------
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 181 ++++++++++++++-
 .../AMDGPUToROCDL/AMDGPUToROCDL.cpp           |  53 ++++-
 3 files changed, 222 insertions(+), 219 deletions(-)
 delete mode 100644 llvm/include/llvm/Support/AMDGPUWaitcnt.h

diff --git a/llvm/include/llvm/Support/AMDGPUWaitcnt.h b/llvm/include/llvm/Support/AMDGPUWaitcnt.h
deleted file mode 100644
index 4d04daa2b24cb..0000000000000
--- a/llvm/include/llvm/Support/AMDGPUWaitcnt.h
+++ /dev/null
@@ -1,207 +0,0 @@
-//===---------------- AMDGPUWaitcnt.h ---------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// AMDGPU waitcnt support infrastructure
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_AMDGPUWAITCNT_H
-#define LLVM_SUPPORT_AMDGPUWAITCNT_H
-
-#include "llvm/TargetParser/TargetParser.h" // IsaVersion
-
-namespace llvm::AMDGPU {
-/// Represents the counter values to wait for in an s_waitcnt instruction.
-///
-/// Large values (including the maximum possible integer) can be used to
-/// represent "don't care" waits.
-struct LLVM_ABI Waitcnt {
-  unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
-  unsigned ExpCnt = ~0u;
-  unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
-  unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
-  unsigned SampleCnt = ~0u; // gfx12+ only.
-  unsigned BvhCnt = ~0u;    // gfx12+ only.
-  unsigned KmCnt = ~0u;     // gfx12+ only.
-  unsigned XCnt = ~0u;      // gfx1250.
-
-  Waitcnt() = default;
-  // Pre-gfx12 constructor.
-  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
-      : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
-
-  // gfx12+ constructor.
-  Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
-          unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
-      : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
-        SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
-
-  bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
-
-  bool hasWaitExceptStoreCnt() const {
-    return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
-           SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
-  }
-
-  bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
-
-  Waitcnt combined(const Waitcnt &Other) const {
-    // Does the right thing provided self and Other are either both pre-gfx12
-    // or both gfx12+.
-    return Waitcnt(
-        std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
-        std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
-        std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
-        std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
-  }
-
-  friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
-};
-
-// The following methods are only meaningful on targets that support
-// S_WAITCNT.
-
-/// \returns Vmcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getVmcntBitMask(const IsaVersion &Version);
-
-/// \returns Expcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getExpcntBitMask(const IsaVersion &Version);
-
-/// \returns Lgkmcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getLgkmcntBitMask(const IsaVersion &Version);
-
-/// \returns Waitcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getWaitcntBitMask(const IsaVersion &Version);
-
-/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-LLVM_ABI unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-LLVM_ABI unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-LLVM_ABI unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
-/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
-/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
-/// which needs it is deprecated
-///
-/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
-///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
-///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
-///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
-///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
-///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
-///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
-///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
-///
-LLVM_ABI void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
-                            unsigned &Vmcnt, unsigned &Expcnt,
-                            unsigned &Lgkmcnt);
-
-LLVM_ABI Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
-
-/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-LLVM_ABI unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
-                              unsigned Vmcnt);
-
-/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-LLVM_ABI unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
-                               unsigned Expcnt);
-
-/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-LLVM_ABI unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
-                                unsigned Lgkmcnt);
-
-/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
-/// \p Version. Should not be used on gfx12+, the instruction which needs
-/// it is deprecated
-///
-/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
-///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
-///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
-///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
-///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11)
-///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
-///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
-///     Waitcnt[15:10] = \p Vmcnt       (gfx11)
-///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
-///
-/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
-/// isa \p Version.
-///
-LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
-                                unsigned Expcnt, unsigned Lgkmcnt);
-
-LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version,
-                                const Waitcnt &Decoded);
-
-// The following methods are only meaningful on targets that support
-// S_WAIT_*CNT, introduced with gfx12.
-
-/// \returns Loadcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support LOADcnt
-LLVM_ABI unsigned getLoadcntBitMask(const IsaVersion &Version);
-
-/// \returns Samplecnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support SAMPLEcnt
-LLVM_ABI unsigned getSamplecntBitMask(const IsaVersion &Version);
-
-/// \returns Bvhcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support BVHcnt
-LLVM_ABI unsigned getBvhcntBitMask(const IsaVersion &Version);
-
-/// \returns Dscnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support DScnt
-LLVM_ABI unsigned getDscntBitMask(const IsaVersion &Version);
-
-/// \returns Dscnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support KMcnt
-LLVM_ABI unsigned getKmcntBitMask(const IsaVersion &Version);
-
-/// \returns Xcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support Xcnt.
-LLVM_ABI unsigned getXcntBitMask(const IsaVersion &Version);
-
-/// \return STOREcnt or VScnt bit mask for given isa \p Version.
-/// returns 0 for versions that do not support STOREcnt or VScnt.
-/// STOREcnt and VScnt are the same counter, the name used
-/// depends on the ISA version.
-LLVM_ABI unsigned getStorecntBitMask(const IsaVersion &Version);
-
-// The following are only meaningful on targets that support
-// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
-
-/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
-/// isa \p Version.
-LLVM_ABI Waitcnt decodeLoadcntDscnt(const IsaVersion &Version,
-                                    unsigned LoadcntDscnt);
-
-/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
-/// isa \p Version.
-LLVM_ABI Waitcnt decodeStorecntDscnt(const IsaVersion &Version,
-                                     unsigned StorecntDscnt);
-
-/// \returns \p Loadcnt and \p Dscnt components of \p Decoded  encoded as an
-/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
-/// \p Version.
-LLVM_ABI unsigned encodeLoadcntDscnt(const IsaVersion &Version,
-                                     const Waitcnt &Decoded);
-
-/// \returns \p Storecnt and \p Dscnt components of \p Decoded  encoded as an
-/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
-/// \p Version.
-LLVM_ABI unsigned encodeStorecntDscnt(const IsaVersion &Version,
-                                      const Waitcnt &Decoded);
-} // end namespace llvm::AMDGPU
-
-#endif // LLVM_SUPPORT_AMDGPUWAITCNT_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 95ee1626a8623..75db58a292c13 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -14,7 +14,6 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/AMDGPUWaitcnt.h"
 #include "llvm/Support/Alignment.h"
 #include <array>
 #include <functional>
@@ -1077,6 +1076,186 @@ getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
 /// Checks if \p Val is inside \p MD, a !range-like metadata.
 bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
 
+/// Represents the counter values to wait for in an s_waitcnt instruction.
+///
+/// Large values (including the maximum possible integer) can be used to
+/// represent "don't care" waits.
+struct Waitcnt {
+  unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
+  unsigned ExpCnt = ~0u;
+  unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
+  unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
+  unsigned SampleCnt = ~0u; // gfx12+ only.
+  unsigned BvhCnt = ~0u;    // gfx12+ only.
+  unsigned KmCnt = ~0u;     // gfx12+ only.
+  unsigned XCnt = ~0u;      // gfx1250.
+
+  Waitcnt() = default;
+  // Pre-gfx12 constructor.
+  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+      : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
+
+  // gfx12+ constructor.
+  Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
+          unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
+      : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
+        SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
+
+  bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
+
+  bool hasWaitExceptStoreCnt() const {
+    return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
+           SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
+  }
+
+  bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
+
+  Waitcnt combined(const Waitcnt &Other) const {
+    // Does the right thing provided self and Other are either both pre-gfx12
+    // or both gfx12+.
+    return Waitcnt(
+        std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
+        std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
+        std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
+        std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
+  }
+
+  friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
+};
+
+// The following methods are only meaningful on targets that support
+// S_WAITCNT.
+
+/// \returns Vmcnt bit mask for given isa \p Version.
+unsigned getVmcntBitMask(const IsaVersion &Version);
+
+/// \returns Expcnt bit mask for given isa \p Version.
+unsigned getExpcntBitMask(const IsaVersion &Version);
+
+/// \returns Lgkmcnt bit mask for given isa \p Version.
+unsigned getLgkmcntBitMask(const IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaVersion &Version);
+
+/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
+/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
+/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
+/// which needs it is deprecated
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
+///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
+///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
+///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
+///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
+///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
+///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
+///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
+///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
+///
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
+                   unsigned &Expcnt, unsigned &Lgkmcnt);
+
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
+
+/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
+                     unsigned Vmcnt);
+
+/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
+                      unsigned Expcnt);
+
+/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
+                       unsigned Lgkmcnt);
+
+/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
+/// \p Version. Should not be used on gfx12+, the instruction which needs
+/// it is deprecated
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
+///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
+///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
+///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
+///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
+///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11)
+///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
+///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
+///     Waitcnt[15:10] = \p Vmcnt       (gfx11)
+///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
+///
+/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
+/// isa \p Version.
+///
+unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
+                       unsigned Expcnt, unsigned Lgkmcnt);
+
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
+// The following methods are only meaningful on targets that support
+// S_WAIT_*CNT, introduced with gfx12.
+
+/// \returns Loadcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support LOADcnt
+unsigned getLoadcntBitMask(const IsaVersion &Version);
+
+/// \returns Samplecnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support SAMPLEcnt
+unsigned getSamplecntBitMask(const IsaVersion &Version);
+
+/// \returns Bvhcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support BVHcnt
+unsigned getBvhcntBitMask(const IsaVersion &Version);
+
+/// \returns Dscnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support DScnt
+unsigned getDscntBitMask(const IsaVersion &Version);
+
+/// \returns Dscnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support KMcnt
+unsigned getKmcntBitMask(const IsaVersion &Version);
+
+/// \returns Xcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support Xcnt.
+unsigned getXcntBitMask(const IsaVersion &Version);
+
+/// \return STOREcnt or VScnt bit mask for given isa \p Version.
+/// returns 0 for versions that do not support STOREcnt or VScnt.
+/// STOREcnt and VScnt are the same counter, the name used
+/// depends on the ISA version.
+unsigned getStorecntBitMask(const IsaVersion &Version);
+
+// The following are only meaningful on targets that support
+// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
+
+/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
+/// isa \p Version.
+Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
+
+/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
+/// isa \p Version.
+Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
+
+/// \returns \p Loadcnt and \p Dscnt components of \p Decoded  encoded as an
+/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
+/// \p Version.
+unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
+/// \returns \p Storecnt and \p Dscnt components of \p Decoded  encoded as an
+/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
+/// \p Version.
+unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
 namespace Hwreg {
 
 using HwregId = EncodingField<5, 0>;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 66d68ba9cb990..73d5376f970ae 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -28,7 +28,6 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/TypeSwitch.h"
-#include "llvm/Support/AMDGPUWaitcnt.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <optional>
@@ -436,18 +435,50 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
   }
 };
 
+// TODO: AMDGPU backend already have all this bitpacking logic, we should move
+// it to some common place.
+///  Vmcnt, Expcnt and Lgkmcnt are decoded as follows:
+///     Vmcnt = Waitcnt[3:0]        (pre-gfx9)
+///     Vmcnt = Waitcnt[15:14,3:0]  (gfx9,10)
+///     Vmcnt = Waitcnt[15:10]      (gfx11)
+///     Expcnt = Waitcnt[6:4]       (pre-gfx11)
+///     Expcnt = Waitcnt[2:0]       (gfx11)
+///     Lgkmcnt = Waitcnt[11:8]     (pre-gfx10)
+///     Lgkmcnt = Waitcnt[13:8]     (gfx10)
+///     Lgkmcnt = Waitcnt[9:4]      (gfx11)
 static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
                                          unsigned expcnt, unsigned lgkmcnt) {
-  if (chipset.majorVersion >= 12)
-    return failure();
-
-  llvm::AMDGPU::IsaVersion isaVersion{
-      chipset.majorVersion, chipset.minorVersion, chipset.steppingVersion};
-  vmcnt = std::min(vmcnt, llvm::AMDGPU::getVmcntBitMask(isaVersion));
-  expcnt = std::min(expcnt, llvm::AMDGPU::getExpcntBitMask(isaVersion));
-  lgkmcnt = std::min(lgkmcnt, llvm::AMDGPU::getLgkmcntBitMask(isaVersion));
-
-  return llvm::AMDGPU::encodeWaitcnt(isaVersion, vmcnt, expcnt, lgkmcnt);
+  if (chipset.majorVersion < 9) {
+    vmcnt = std::min(15u, vmcnt);
+    expcnt = std::min(7u, expcnt);
+    lgkmcnt = std::min(15u, lgkmcnt);
+    return vmcnt | (expcnt << 4) | (lgkmcnt << 8);
+  }
+  if (chipset.majorVersion == 9) {
+    vmcnt = std::min(63u, vmcnt);
+    expcnt = std::min(7u, expcnt);
+    lgkmcnt = std::min(15u, lgkmcnt);
+    unsigned lowBits = vmcnt & 0xF;
+    unsigned highBits = (vmcnt >> 4) << 14;
+    unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
+    return lowBits | highBits | otherCnts;
+  }
+  if (chipset.majorVersion == 10) {
+    vmcnt = std::min(63u, vmcnt);
+    expcnt = std::min(7u, expcnt);
+    lgkmcnt = std::min(63u, lgkmcnt);
+    unsigned lowBits = vmcnt & 0xF;
+    unsigned highBits = (vmcnt >> 4) << 14;
+    unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
+    return lowBits | highBits | otherCnts;
+  }
+  if (chipset.majorVersion == 11) {
+    vmcnt = std::min(63u, vmcnt);
+    expcnt = std::min(7u, expcnt);
+    lgkmcnt = std::min(63u, lgkmcnt);
+    return (vmcnt << 10) | expcnt | (lgkmcnt << 4);
+  }
+  return failure();
 }
 
 struct MemoryCounterWaitOpLowering