[llvm] [mlir] Reland [mlir][amdgpu] Expose waitcnt bitpacking infra (PR #172678)

Wed Dec 17 07:19:32 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-mlir-gpu

Author: Ivan Butygin (Hardcode84)

<details>
<summary>Changes</summary>

Reland https://github.com/llvm/llvm-project/pull/172313.

Add missing lib

---

Patch is 21.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172678.diff


4 Files Affected:

- (added) llvm/include/llvm/Support/AMDGPUWaitcnt.h (+207) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1-180) 
- (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+11-42) 
- (modified) mlir/lib/Conversion/AMDGPUToROCDL/CMakeLists.txt (+1) 


``````````diff

diff --git a/llvm/include/llvm/Support/AMDGPUWaitcnt.h b/llvm/include/llvm/Support/AMDGPUWaitcnt.h
new file mode 100644
index 0000000000000..4d04daa2b24cb
--- /dev/null
+++ b/llvm/include/llvm/Support/AMDGPUWaitcnt.h
@@ -0,0 +1,207 @@
+//===---------------- AMDGPUWaitcnt.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// AMDGPU waitcnt support infrastructure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AMDGPUWAITCNT_H
+#define LLVM_SUPPORT_AMDGPUWAITCNT_H
+
+#include "llvm/TargetParser/TargetParser.h" // IsaVersion
+
+namespace llvm::AMDGPU {
+/// Represents the counter values to wait for in an s_waitcnt instruction.
+///
+/// Large values (including the maximum possible integer) can be used to
+/// represent "don't care" waits.
+struct LLVM_ABI Waitcnt {
+  unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
+  unsigned ExpCnt = ~0u;
+  unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
+  unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
+  unsigned SampleCnt = ~0u; // gfx12+ only.
+  unsigned BvhCnt = ~0u;    // gfx12+ only.
+  unsigned KmCnt = ~0u;     // gfx12+ only.
+  unsigned XCnt = ~0u;      // gfx1250.
+
+  Waitcnt() = default;
+  // Pre-gfx12 constructor.
+  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+      : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
+
+  // gfx12+ constructor.
+  Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
+          unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
+      : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
+        SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
+
+  bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
+
+  bool hasWaitExceptStoreCnt() const {
+    return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
+           SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
+  }
+
+  bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
+
+  Waitcnt combined(const Waitcnt &Other) const {
+    // Does the right thing provided self and Other are either both pre-gfx12
+    // or both gfx12+.
+    return Waitcnt(
+        std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
+        std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
+        std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
+        std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
+  }
+
+  friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
+};
+
+// The following methods are only meaningful on targets that support
+// S_WAITCNT.
+
+/// \returns Vmcnt bit mask for given isa \p Version.
+LLVM_ABI unsigned getVmcntBitMask(const IsaVersion &Version);
+
+/// \returns Expcnt bit mask for given isa \p Version.
+LLVM_ABI unsigned getExpcntBitMask(const IsaVersion &Version);
+
+/// \returns Lgkmcnt bit mask for given isa \p Version.
+LLVM_ABI unsigned getLgkmcntBitMask(const IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+LLVM_ABI unsigned getWaitcntBitMask(const IsaVersion &Version);
+
+/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
+LLVM_ABI unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
+LLVM_ABI unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
+LLVM_ABI unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
+/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
+/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
+/// which needs it is deprecated
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
+///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
+///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
+///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
+///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
+///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
+///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
+///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
+///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
+///
+LLVM_ABI void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
+                            unsigned &Vmcnt, unsigned &Expcnt,
+                            unsigned &Lgkmcnt);
+
+LLVM_ABI Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
+
+/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
+LLVM_ABI unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
+                              unsigned Vmcnt);
+
+/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
+LLVM_ABI unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
+                               unsigned Expcnt);
+
+/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
+LLVM_ABI unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
+                                unsigned Lgkmcnt);
+
+/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
+/// \p Version. Should not be used on gfx12+, the instruction which needs
+/// it is deprecated
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
+///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
+///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
+///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
+///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
+///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11)
+///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
+///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
+///     Waitcnt[15:10] = \p Vmcnt       (gfx11)
+///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
+///
+/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
+/// isa \p Version.
+///
+LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
+                                unsigned Expcnt, unsigned Lgkmcnt);
+
+LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version,
+                                const Waitcnt &Decoded);
+
+// The following methods are only meaningful on targets that support
+// S_WAIT_*CNT, introduced with gfx12.
+
+/// \returns Loadcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support LOADcnt
+LLVM_ABI unsigned getLoadcntBitMask(const IsaVersion &Version);
+
+/// \returns Samplecnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support SAMPLEcnt
+LLVM_ABI unsigned getSamplecntBitMask(const IsaVersion &Version);
+
+/// \returns Bvhcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support BVHcnt
+LLVM_ABI unsigned getBvhcntBitMask(const IsaVersion &Version);
+
+/// \returns Dscnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support DScnt
+LLVM_ABI unsigned getDscntBitMask(const IsaVersion &Version);
+
+/// \returns Dscnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support KMcnt
+LLVM_ABI unsigned getKmcntBitMask(const IsaVersion &Version);
+
+/// \returns Xcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support Xcnt.
+LLVM_ABI unsigned getXcntBitMask(const IsaVersion &Version);
+
+/// \return STOREcnt or VScnt bit mask for given isa \p Version.
+/// returns 0 for versions that do not support STOREcnt or VScnt.
+/// STOREcnt and VScnt are the same counter, the name used
+/// depends on the ISA version.
+LLVM_ABI unsigned getStorecntBitMask(const IsaVersion &Version);
+
+// The following are only meaningful on targets that support
+// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
+
+/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
+/// isa \p Version.
+LLVM_ABI Waitcnt decodeLoadcntDscnt(const IsaVersion &Version,
+                                    unsigned LoadcntDscnt);
+
+/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
+/// isa \p Version.
+LLVM_ABI Waitcnt decodeStorecntDscnt(const IsaVersion &Version,
+                                     unsigned StorecntDscnt);
+
+/// \returns \p Loadcnt and \p Dscnt components of \p Decoded  encoded as an
+/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
+/// \p Version.
+LLVM_ABI unsigned encodeLoadcntDscnt(const IsaVersion &Version,
+                                     const Waitcnt &Decoded);
+
+/// \returns \p Storecnt and \p Dscnt components of \p Decoded  encoded as an
+/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
+/// \p Version.
+LLVM_ABI unsigned encodeStorecntDscnt(const IsaVersion &Version,
+                                      const Waitcnt &Decoded);
+} // end namespace llvm::AMDGPU
+
+#endif // LLVM_SUPPORT_AMDGPUWAITCNT_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 75db58a292c13..95ee1626a8623 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -14,6 +14,7 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/AMDGPUWaitcnt.h"
 #include "llvm/Support/Alignment.h"
 #include <array>
 #include <functional>
@@ -1076,186 +1077,6 @@ getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
 /// Checks if \p Val is inside \p MD, a !range-like metadata.
 bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
 
-/// Represents the counter values to wait for in an s_waitcnt instruction.
-///
-/// Large values (including the maximum possible integer) can be used to
-/// represent "don't care" waits.
-struct Waitcnt {
-  unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
-  unsigned ExpCnt = ~0u;
-  unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
-  unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
-  unsigned SampleCnt = ~0u; // gfx12+ only.
-  unsigned BvhCnt = ~0u;    // gfx12+ only.
-  unsigned KmCnt = ~0u;     // gfx12+ only.
-  unsigned XCnt = ~0u;      // gfx1250.
-
-  Waitcnt() = default;
-  // Pre-gfx12 constructor.
-  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
-      : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
-
-  // gfx12+ constructor.
-  Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
-          unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
-      : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
-        SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
-
-  bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
-
-  bool hasWaitExceptStoreCnt() const {
-    return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
-           SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
-  }
-
-  bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
-
-  Waitcnt combined(const Waitcnt &Other) const {
-    // Does the right thing provided self and Other are either both pre-gfx12
-    // or both gfx12+.
-    return Waitcnt(
-        std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
-        std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
-        std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
-        std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
-  }
-
-  friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
-};
-
-// The following methods are only meaningful on targets that support
-// S_WAITCNT.
-
-/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(const IsaVersion &Version);
-
-/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(const IsaVersion &Version);
-
-/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(const IsaVersion &Version);
-
-/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(const IsaVersion &Version);
-
-/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
-/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
-/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
-/// which needs it is deprecated
-///
-/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
-///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
-///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
-///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
-///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
-///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
-///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
-///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
-///
-void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
-                   unsigned &Expcnt, unsigned &Lgkmcnt);
-
-Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
-
-/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
-                     unsigned Vmcnt);
-
-/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
-                      unsigned Expcnt);
-
-/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
-                       unsigned Lgkmcnt);
-
-/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
-/// \p Version. Should not be used on gfx12+, the instruction which needs
-/// it is deprecated
-///
-/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
-///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
-///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
-///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
-///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11)
-///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
-///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
-///     Waitcnt[15:10] = \p Vmcnt       (gfx11)
-///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
-///
-/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
-/// isa \p Version.
-///
-unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
-                       unsigned Expcnt, unsigned Lgkmcnt);
-
-unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
-
-// The following methods are only meaningful on targets that support
-// S_WAIT_*CNT, introduced with gfx12.
-
-/// \returns Loadcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support LOADcnt
-unsigned getLoadcntBitMask(const IsaVersion &Version);
-
-/// \returns Samplecnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support SAMPLEcnt
-unsigned getSamplecntBitMask(const IsaVersion &Version);
-
-/// \returns Bvhcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support BVHcnt
-unsigned getBvhcntBitMask(const IsaVersion &Version);
-
-/// \returns Dscnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support DScnt
-unsigned getDscntBitMask(const IsaVersion &Version);
-
-/// \returns Dscnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support KMcnt
-unsigned getKmcntBitMask(const IsaVersion &Version);
-
-/// \returns Xcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support Xcnt.
-unsigned getXcntBitMask(const IsaVersion &Version);
-
-/// \return STOREcnt or VScnt bit mask for given isa \p Version.
-/// returns 0 for versions that do not support STOREcnt or VScnt.
-/// STOREcnt and VScnt are the same counter, the name used
-/// depends on the ISA version.
-unsigned getStorecntBitMask(const IsaVersion &Version);
-
-// The following are only meaningful on targets that support
-// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
-
-/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
-/// isa \p Version.
-Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
-
-/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
-/// isa \p Version.
-Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
-
-/// \returns \p Loadcnt and \p Dscnt components of \p Decoded  encoded as an
-/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
-/// \p Version.
-unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
-
-/// \returns \p Storecnt and \p Dscnt components of \p Decoded  encoded as an
-/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
-/// \p Version.
-unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
-
 namespace Hwreg {
 
 using HwregId = EncodingField<5, 0>;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 541bb02d79eae..5fdfdfdf7f263 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -28,6 +28,7 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/AMDGPUWaitcnt.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <optional>
@@ -435,50 +436,18 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
   }
 };
 
-// TODO: AMDGPU backend already have all this bitpacking logic, we should move
-// it to some common place.
-///  Vmcnt, Expcnt and Lgkmcnt are decoded as follows:
-///     Vmcnt = Waitcnt[3:0]        (pre-gfx9)
-///     Vmcnt = Waitcnt[15:14,3:0]  (gfx9,10)
-///     Vmcnt = Waitcnt[15:10]      (gfx11)
-///     Expcnt = Waitcnt[6:4]       (pre-gfx11)
-///     Expcnt = Waitcnt[2:0]       (gfx11)
-///     Lgkmcnt = Waitcnt[11:8]     (pre-gfx10)
-///     Lgkmcnt = Waitcnt[13:8]     (gfx10)
-///     Lgkmcnt = Waitcnt[9:4]      (gfx11)
 static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
                                          unsigned expcnt, unsigned lgkmcnt) {
-  if (chipset.majorVersion < 9) {
-    vmcnt = std::min(15u, vmcnt);
-    expcnt = std::min(7u, expcnt);
-    lgkmcnt = std::min(15u, lgkmcnt);
-    return vmcnt | (expcnt << 4) | (lgkmcnt << 8);
-  }
-  if (chipset.majorVersion == 9) {
-    vmcnt = std::min(63u, vmcnt);
-    expcnt = std::min(7u, expcnt);
-    lgkmcnt = std::min(15u, lgkmcnt);
-    unsigned lowBits = vmcnt & 0xF;
-    unsigned highBits = (vmcnt >> 4) << 14;
-    unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
-    return lowBits | highBits | otherCnts;
-  }
-  if (chipset.majorVersion == 10) {
-    vmcnt = std::min(63u, vmcnt);
-    expcnt = std::min(7u, expcnt);
-    lgkmcnt = std::min(63u, lgkmcnt);
-    unsigned lowBits = vmcnt & 0xF;
-    unsigned highBits = (vmcnt >> 4) << 14;
-    unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
-    return lowBits | highBits | otherCnts;
-  }
-  if (chipset.majorVersion == 11) {
-    vmcnt = std::min(63u, vmcnt);
...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/172678