[llvm] [mlir] Revert "[mlir][amdgpu] Expose waitcnt bitpacking infra (#172313)" (PR #172636)
Ivan Butygin via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 17 03:42:04 PST 2025
https://github.com/Hardcode84 created https://github.com/llvm/llvm-project/pull/172636
This reverts commit 93013817afabe23a07073528481856b3507b6faf.
Revert https://github.com/llvm/llvm-project/pull/172313
Missing libraries, again
>From aa4ea70358bdfc4389a44181cb3657f8543cf50f Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Wed, 17 Dec 2025 12:34:41 +0100
Subject: [PATCH] Revert "[mlir][amdgpu] Expose waitcnt bitpacking infra
(#172313)"
This reverts commit 93013817afabe23a07073528481856b3507b6faf.
---
llvm/include/llvm/Support/AMDGPUWaitcnt.h | 207 ------------------
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 181 ++++++++++++++-
.../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 53 ++++-
3 files changed, 222 insertions(+), 219 deletions(-)
delete mode 100644 llvm/include/llvm/Support/AMDGPUWaitcnt.h
diff --git a/llvm/include/llvm/Support/AMDGPUWaitcnt.h b/llvm/include/llvm/Support/AMDGPUWaitcnt.h
deleted file mode 100644
index 4d04daa2b24cb..0000000000000
--- a/llvm/include/llvm/Support/AMDGPUWaitcnt.h
+++ /dev/null
@@ -1,207 +0,0 @@
-//===---------------- AMDGPUWaitcnt.h ---------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// AMDGPU waitcnt support infrastructure
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_AMDGPUWAITCNT_H
-#define LLVM_SUPPORT_AMDGPUWAITCNT_H
-
-#include "llvm/TargetParser/TargetParser.h" // IsaVersion
-
-namespace llvm::AMDGPU {
-/// Represents the counter values to wait for in an s_waitcnt instruction.
-///
-/// Large values (including the maximum possible integer) can be used to
-/// represent "don't care" waits.
-struct LLVM_ABI Waitcnt {
- unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
- unsigned ExpCnt = ~0u;
- unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
- unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
- unsigned SampleCnt = ~0u; // gfx12+ only.
- unsigned BvhCnt = ~0u; // gfx12+ only.
- unsigned KmCnt = ~0u; // gfx12+ only.
- unsigned XCnt = ~0u; // gfx1250.
-
- Waitcnt() = default;
- // Pre-gfx12 constructor.
- Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
- : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
-
- // gfx12+ constructor.
- Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
- unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
- : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
- SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
-
- bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
-
- bool hasWaitExceptStoreCnt() const {
- return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
- SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
- }
-
- bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
-
- Waitcnt combined(const Waitcnt &Other) const {
- // Does the right thing provided self and Other are either both pre-gfx12
- // or both gfx12+.
- return Waitcnt(
- std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
- std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
- std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
- std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
- }
-
- friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
-};
-
-// The following methods are only meaningful on targets that support
-// S_WAITCNT.
-
-/// \returns Vmcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getVmcntBitMask(const IsaVersion &Version);
-
-/// \returns Expcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getExpcntBitMask(const IsaVersion &Version);
-
-/// \returns Lgkmcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getLgkmcntBitMask(const IsaVersion &Version);
-
-/// \returns Waitcnt bit mask for given isa \p Version.
-LLVM_ABI unsigned getWaitcntBitMask(const IsaVersion &Version);
-
-/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-LLVM_ABI unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-LLVM_ABI unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-LLVM_ABI unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
-
-/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
-/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
-/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
-/// which needs it is deprecated
-///
-/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
-/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
-/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
-/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
-/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
-/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
-/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
-/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
-///
-LLVM_ABI void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
- unsigned &Vmcnt, unsigned &Expcnt,
- unsigned &Lgkmcnt);
-
-LLVM_ABI Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
-
-/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-LLVM_ABI unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
- unsigned Vmcnt);
-
-/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-LLVM_ABI unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
- unsigned Expcnt);
-
-/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-LLVM_ABI unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
- unsigned Lgkmcnt);
-
-/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
-/// \p Version. Should not be used on gfx12+, the instruction which needs
-/// it is deprecated
-///
-/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-/// Waitcnt[2:0] = \p Expcnt (gfx11+)
-/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
-/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
-/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
-/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
-/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
-/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
-/// Waitcnt[15:10] = \p Vmcnt (gfx11)
-/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
-///
-/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
-/// isa \p Version.
-///
-LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
- unsigned Expcnt, unsigned Lgkmcnt);
-
-LLVM_ABI unsigned encodeWaitcnt(const IsaVersion &Version,
- const Waitcnt &Decoded);
-
-// The following methods are only meaningful on targets that support
-// S_WAIT_*CNT, introduced with gfx12.
-
-/// \returns Loadcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support LOADcnt
-LLVM_ABI unsigned getLoadcntBitMask(const IsaVersion &Version);
-
-/// \returns Samplecnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support SAMPLEcnt
-LLVM_ABI unsigned getSamplecntBitMask(const IsaVersion &Version);
-
-/// \returns Bvhcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support BVHcnt
-LLVM_ABI unsigned getBvhcntBitMask(const IsaVersion &Version);
-
-/// \returns Dscnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support DScnt
-LLVM_ABI unsigned getDscntBitMask(const IsaVersion &Version);
-
-/// \returns Dscnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support KMcnt
-LLVM_ABI unsigned getKmcntBitMask(const IsaVersion &Version);
-
-/// \returns Xcnt bit mask for given isa \p Version.
-/// Returns 0 for versions that do not support Xcnt.
-LLVM_ABI unsigned getXcntBitMask(const IsaVersion &Version);
-
-/// \return STOREcnt or VScnt bit mask for given isa \p Version.
-/// returns 0 for versions that do not support STOREcnt or VScnt.
-/// STOREcnt and VScnt are the same counter, the name used
-/// depends on the ISA version.
-LLVM_ABI unsigned getStorecntBitMask(const IsaVersion &Version);
-
-// The following are only meaningful on targets that support
-// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
-
-/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
-/// isa \p Version.
-LLVM_ABI Waitcnt decodeLoadcntDscnt(const IsaVersion &Version,
- unsigned LoadcntDscnt);
-
-/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
-/// isa \p Version.
-LLVM_ABI Waitcnt decodeStorecntDscnt(const IsaVersion &Version,
- unsigned StorecntDscnt);
-
-/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
-/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
-/// \p Version.
-LLVM_ABI unsigned encodeLoadcntDscnt(const IsaVersion &Version,
- const Waitcnt &Decoded);
-
-/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
-/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
-/// \p Version.
-LLVM_ABI unsigned encodeStorecntDscnt(const IsaVersion &Version,
- const Waitcnt &Decoded);
-} // end namespace llvm::AMDGPU
-
-#endif // LLVM_SUPPORT_AMDGPUWAITCNT_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 95ee1626a8623..75db58a292c13 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -14,7 +14,6 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/AMDGPUWaitcnt.h"
#include "llvm/Support/Alignment.h"
#include <array>
#include <functional>
@@ -1077,6 +1076,186 @@ getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
/// Checks if \p Val is inside \p MD, a !range-like metadata.
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
+/// Represents the counter values to wait for in an s_waitcnt instruction.
+///
+/// Large values (including the maximum possible integer) can be used to
+/// represent "don't care" waits.
+struct Waitcnt {
+ unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
+ unsigned ExpCnt = ~0u;
+ unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
+ unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
+ unsigned SampleCnt = ~0u; // gfx12+ only.
+ unsigned BvhCnt = ~0u; // gfx12+ only.
+ unsigned KmCnt = ~0u; // gfx12+ only.
+ unsigned XCnt = ~0u; // gfx1250.
+
+ Waitcnt() = default;
+ // Pre-gfx12 constructor.
+ Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+ : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
+
+ // gfx12+ constructor.
+ Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
+ unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
+ : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
+ SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
+
+ bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
+
+ bool hasWaitExceptStoreCnt() const {
+ return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
+ SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
+ }
+
+ bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
+
+ Waitcnt combined(const Waitcnt &Other) const {
+ // Does the right thing provided self and Other are either both pre-gfx12
+ // or both gfx12+.
+ return Waitcnt(
+ std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
+ std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
+ std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
+ std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt));
+ }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
+};
+
+// The following methods are only meaningful on targets that support
+// S_WAITCNT.
+
+/// \returns Vmcnt bit mask for given isa \p Version.
+unsigned getVmcntBitMask(const IsaVersion &Version);
+
+/// \returns Expcnt bit mask for given isa \p Version.
+unsigned getExpcntBitMask(const IsaVersion &Version);
+
+/// \returns Lgkmcnt bit mask for given isa \p Version.
+unsigned getLgkmcntBitMask(const IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaVersion &Version);
+
+/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
+/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
+/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
+/// which needs it is deprecated
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
+/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
+/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
+/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
+/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
+/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
+/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
+/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
+/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
+///
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
+ unsigned &Expcnt, unsigned &Lgkmcnt);
+
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
+
+/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt);
+
+/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt);
+
+/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt);
+
+/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
+/// \p Version. Should not be used on gfx12+, the instruction which needs
+/// it is deprecated
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
+/// Waitcnt[2:0] = \p Expcnt (gfx11+)
+/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
+/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
+/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
+/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
+/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
+/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
+/// Waitcnt[15:10] = \p Vmcnt (gfx11)
+/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
+///
+/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
+/// isa \p Version.
+///
+unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
+ unsigned Expcnt, unsigned Lgkmcnt);
+
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
+// The following methods are only meaningful on targets that support
+// S_WAIT_*CNT, introduced with gfx12.
+
+/// \returns Loadcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support LOADcnt
+unsigned getLoadcntBitMask(const IsaVersion &Version);
+
+/// \returns Samplecnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support SAMPLEcnt
+unsigned getSamplecntBitMask(const IsaVersion &Version);
+
+/// \returns Bvhcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support BVHcnt
+unsigned getBvhcntBitMask(const IsaVersion &Version);
+
+/// \returns Dscnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support DScnt
+unsigned getDscntBitMask(const IsaVersion &Version);
+
+/// \returns Dscnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support KMcnt
+unsigned getKmcntBitMask(const IsaVersion &Version);
+
+/// \returns Xcnt bit mask for given isa \p Version.
+/// Returns 0 for versions that do not support Xcnt.
+unsigned getXcntBitMask(const IsaVersion &Version);
+
+/// \return STOREcnt or VScnt bit mask for given isa \p Version.
+/// returns 0 for versions that do not support STOREcnt or VScnt.
+/// STOREcnt and VScnt are the same counter, the name used
+/// depends on the ISA version.
+unsigned getStorecntBitMask(const IsaVersion &Version);
+
+// The following are only meaningful on targets that support
+// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
+
+/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
+/// isa \p Version.
+Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
+
+/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
+/// isa \p Version.
+Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
+
+/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
+/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
+/// \p Version.
+unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
+/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
+/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
+/// \p Version.
+unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
namespace Hwreg {
using HwregId = EncodingField<5, 0>;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 66d68ba9cb990..73d5376f970ae 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -28,7 +28,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
-#include "llvm/Support/AMDGPUWaitcnt.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <optional>
@@ -436,18 +435,50 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
}
};
+// TODO: AMDGPU backend already have all this bitpacking logic, we should move
+// it to some common place.
+/// Vmcnt, Expcnt and Lgkmcnt are decoded as follows:
+/// Vmcnt = Waitcnt[3:0] (pre-gfx9)
+/// Vmcnt = Waitcnt[15:14,3:0] (gfx9,10)
+/// Vmcnt = Waitcnt[15:10] (gfx11)
+/// Expcnt = Waitcnt[6:4] (pre-gfx11)
+/// Expcnt = Waitcnt[2:0] (gfx11)
+/// Lgkmcnt = Waitcnt[11:8] (pre-gfx10)
+/// Lgkmcnt = Waitcnt[13:8] (gfx10)
+/// Lgkmcnt = Waitcnt[9:4] (gfx11)
static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
unsigned expcnt, unsigned lgkmcnt) {
- if (chipset.majorVersion >= 12)
- return failure();
-
- llvm::AMDGPU::IsaVersion isaVersion{
- chipset.majorVersion, chipset.minorVersion, chipset.steppingVersion};
- vmcnt = std::min(vmcnt, llvm::AMDGPU::getVmcntBitMask(isaVersion));
- expcnt = std::min(expcnt, llvm::AMDGPU::getExpcntBitMask(isaVersion));
- lgkmcnt = std::min(lgkmcnt, llvm::AMDGPU::getLgkmcntBitMask(isaVersion));
-
- return llvm::AMDGPU::encodeWaitcnt(isaVersion, vmcnt, expcnt, lgkmcnt);
+ if (chipset.majorVersion < 9) {
+ vmcnt = std::min(15u, vmcnt);
+ expcnt = std::min(7u, expcnt);
+ lgkmcnt = std::min(15u, lgkmcnt);
+ return vmcnt | (expcnt << 4) | (lgkmcnt << 8);
+ }
+ if (chipset.majorVersion == 9) {
+ vmcnt = std::min(63u, vmcnt);
+ expcnt = std::min(7u, expcnt);
+ lgkmcnt = std::min(15u, lgkmcnt);
+ unsigned lowBits = vmcnt & 0xF;
+ unsigned highBits = (vmcnt >> 4) << 14;
+ unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
+ return lowBits | highBits | otherCnts;
+ }
+ if (chipset.majorVersion == 10) {
+ vmcnt = std::min(63u, vmcnt);
+ expcnt = std::min(7u, expcnt);
+ lgkmcnt = std::min(63u, lgkmcnt);
+ unsigned lowBits = vmcnt & 0xF;
+ unsigned highBits = (vmcnt >> 4) << 14;
+ unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
+ return lowBits | highBits | otherCnts;
+ }
+ if (chipset.majorVersion == 11) {
+ vmcnt = std::min(63u, vmcnt);
+ expcnt = std::min(7u, expcnt);
+ lgkmcnt = std::min(63u, lgkmcnt);
+ return (vmcnt << 10) | expcnt | (lgkmcnt << 4);
+ }
+ return failure();
}
struct MemoryCounterWaitOpLowering
More information about the llvm-commits
mailing list