[llvm-branch-commits] [llvm] [AMDGPU] wip: MIR pretty printing for S_WAITCNT_FENCE_soft (PR #150391)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jul 24 02:20:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Sameer Sahasrabuddhe (ssahasra)
<details>
<summary>Changes</summary>
---
Patch is 34.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150391.diff
7 Files Affected:
- (modified) llvm/lib/CodeGen/MIRParser/MIParser.cpp (+10-15)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp (+161)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+6-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll (+36-36)
- (added) llvm/test/CodeGen/AMDGPU/fence-parameters.mir (+29)
- (modified) llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir (+12-12)
``````````diff
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 3a364d5ff0d20..c8ad286a87a35 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1850,28 +1850,25 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
return false;
}
+// The target mnemonic is an expression of the form:
+//
+// Dot(IntegerLiteral|Identifier|Dot)+
+//
+// We could be stricter like not terminating in a dot, but that's note important
+// where this is being used.
bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
const unsigned OpIdx,
MachineOperand &Dest,
const MIRFormatter &MF) {
assert(Token.is(MIToken::dot));
auto Loc = Token.location(); // record start position
- size_t Len = 1; // for "."
- lex();
-
- // Handle the case that mnemonic starts with number.
- if (Token.is(MIToken::IntegerLiteral)) {
+ size_t Len = 0;
+ while (Token.is(MIToken::IntegerLiteral) || Token.is(MIToken::dot) ||
+ Token.is(MIToken::Identifier)) {
Len += Token.range().size();
lex();
}
-
- StringRef Src;
- if (Token.is(MIToken::comma))
- Src = StringRef(Loc, Len);
- else {
- assert(Token.is(MIToken::Identifier));
- Src = StringRef(Loc, Len + Token.stringValue().size());
- }
+ StringRef Src(Loc, Len);
int64_t Val;
if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
[this](StringRef::iterator Loc, const Twine &Msg)
@@ -1879,8 +1876,6 @@ bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
return true;
Dest = MachineOperand::CreateImm(Val);
- if (!Token.is(MIToken::comma))
- lex();
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
index 75e3d8c426e73..f318d6ffc1bae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
@@ -12,10 +12,135 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMIRFormatter.h"
+#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
using namespace llvm;
+bool parseAtomicOrdering(StringRef Src, unsigned &Order) {
+ Src.consume_front(".");
+ for (unsigned I = 0; I <= (unsigned)AtomicOrdering::LAST; ++I) {
+ if (Src == toIRString((AtomicOrdering)I)) {
+ Order = I;
+ return true;
+ }
+ }
+ Order = ~0u;
+ return false;
+}
+
+static const char *fmtScope(unsigned Scope) {
+ static const char *Names[] = {"none", "singlethread", "wavefront",
+ "workgroup", "agent", "system"};
+ return Names[Scope];
+}
+
+bool parseAtomicScope(StringRef Src, unsigned &Scope) {
+ Src.consume_front(".");
+ for (unsigned I = 0;
+ I != (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES; ++I) {
+ if (Src == fmtScope(I)) {
+ Scope = I;
+ return true;
+ }
+ }
+ Scope = ~0u;
+ return false;
+}
+
+static const char *fmtAddrSpace(unsigned Space) {
+ static const char *Names[] = {"none", "global", "lds",
+ "scratch", "gds", "other"};
+ return Names[Space];
+}
+
+bool parseOneAddrSpace(StringRef Src, unsigned &AddrSpace) {
+ if (Src == "none") {
+ AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::NONE;
+ return true;
+ }
+ if (Src == "flat") {
+ AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT;
+ return true;
+ }
+ if (Src == "atomic") {
+ AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC;
+ return true;
+ }
+ if (Src == "all") {
+ AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ALL;
+ return true;
+ }
+ for (unsigned I = 1, A = 1; A <= (unsigned)AMDGPU::SIAtomicAddrSpace::LAST;
+ A <<= 1, ++I) {
+ if (Src == fmtAddrSpace(I)) {
+ AddrSpace = A;
+ return true;
+ }
+ }
+ AddrSpace = ~0u;
+ return false;
+}
+
+bool parseAddrSpace(StringRef Src, unsigned &AddrSpace) {
+ Src = Src.trim();
+ Src.consume_front(".");
+ while (!Src.empty()) {
+ auto [First, Rest] = Src.split('.');
+ unsigned OneSpace;
+ if (!parseOneAddrSpace(First, OneSpace))
+ return false;
+ AddrSpace |= OneSpace;
+ Src = Rest;
+ }
+ return true;
+}
+
+static void fmtAddrSpace(raw_ostream &OS, int64_t Imm) {
+ OS << '.';
+ if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::NONE) {
+ OS << "none";
+ return;
+ }
+ if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT) {
+ OS << "flat";
+ return;
+ }
+ if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC) {
+ OS << "atomic";
+ return;
+ }
+ if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ALL) {
+ OS << "all";
+ return;
+ }
+
+ ListSeparator LS{"."};
+ auto AddrSpace = (AMDGPU::SIAtomicAddrSpace)Imm;
+ const auto LAST = (unsigned)AMDGPU::SIAtomicAddrSpace::LAST;
+
+ for (unsigned A = 1, I = 1; A <= LAST; A <<= 1, ++I) {
+ if (any(AddrSpace & (AMDGPU::SIAtomicAddrSpace)A))
+ OS << LS << StringRef(fmtAddrSpace(I));
+ }
+}
+
+static void printFenceOperand(raw_ostream &OS, const MachineInstr &MI,
+ std::optional<unsigned int> OpIdx, int64_t Imm) {
+#define GET_IDX(Name) \
+ AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name)
+ if (OpIdx == GET_IDX(Ordering)) {
+ assert(Imm <= (unsigned)AtomicOrdering::LAST);
+ OS << '.' << StringRef(toIRString((AtomicOrdering)Imm));
+ } else if (OpIdx == GET_IDX(Scope)) {
+ assert(Imm < (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES);
+ OS << '.' << StringRef(fmtScope(Imm));
+ } else if (OpIdx == GET_IDX(AddrSpace)) {
+ fmtAddrSpace(OS, Imm);
+ }
+#undef GET_IDX
+}
+
void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
std::optional<unsigned int> OpIdx, int64_t Imm) const {
@@ -24,12 +149,46 @@ void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
assert(OpIdx == 0);
printSDelayAluImm(Imm, OS);
break;
+ case AMDGPU::S_WAITCNT_FENCE_soft:
+ printFenceOperand(OS, MI, OpIdx, Imm);
+ break;
default:
MIRFormatter::printImm(OS, MI, OpIdx, Imm);
break;
}
}
+static bool
+parseFenceParameter(const unsigned int OpIdx, int64_t &Imm,
+ llvm::StringRef &Src,
+ llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) {
+#define GET_IDX(Name) \
+ AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name)
+ if (OpIdx == (unsigned)GET_IDX(Ordering)) {
+ unsigned Order = 0;
+ if (!parseAtomicOrdering(Src, Order))
+ return ErrorCallback(Src.begin(), "Expected atomic ordering");
+ Imm = Order;
+ return false;
+ }
+ if (OpIdx == (unsigned)GET_IDX(Scope)) {
+ unsigned Scope = 0;
+ if (!parseAtomicScope(Src, Scope))
+ return ErrorCallback(Src.begin(), "Expected atomic scope");
+ Imm = Scope;
+ return false;
+ }
+ if (OpIdx == (unsigned)GET_IDX(AddrSpace)) {
+ unsigned AddrSpace = 0;
+ if (!parseAddrSpace(Src, AddrSpace))
+ return ErrorCallback(Src.begin(), "Expected address space");
+ Imm = AddrSpace;
+ return false;
+ }
+ return true;
+#undef GET_IDX
+}
+
/// Implement target specific parsing of immediate mnemonics. The mnemonic is
/// a string with a leading dot.
bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
@@ -41,6 +200,8 @@ bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
switch (OpCode) {
case AMDGPU::S_DELAY_ALU:
return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback);
+ case AMDGPU::S_WAITCNT_FENCE_soft:
+ return parseFenceParameter(OpIdx, Imm, Src, ErrorCallback);
default:
break;
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 9d30951cac1a3..d7c2aff1d3411 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -421,13 +421,16 @@ enum CPol {
} // namespace CPol
/// The atomic synchronization scopes supported by the AMDGPU target.
+//
+// Note: Update the strings in AMDGPUMIRFormatter.cpp to match this enum.
enum class SIAtomicScope {
NONE,
SINGLETHREAD,
WAVEFRONT,
WORKGROUP,
AGENT,
- SYSTEM
+ SYSTEM,
+ NUM_SI_ATOMIC_SCOPES
};
/// The distinct address spaces supported by the AMDGPU target for
@@ -439,6 +442,7 @@ enum class SIAtomicAddrSpace {
SCRATCH = 1u << 2,
GDS = 1u << 3,
OTHER = 1u << 4,
+ LAST = OTHER,
/// The address spaces that can be accessed by a FLAT instruction.
FLAT = GLOBAL | LDS | SCRATCH,
@@ -449,7 +453,7 @@ enum class SIAtomicAddrSpace {
/// All address spaces.
ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
- LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
+ LLVM_MARK_AS_BITMASK_ENUM(/* Highest bit defined = */ LAST)
};
namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
index 1f01c64de546c..6a14c2c9aae7f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
@@ -536,36 +536,36 @@ entry:
define amdgpu_kernel void @workgroup_one_as_release() #0 {
; GFX6-LABEL: name: workgroup_one_as_release
; GFX6: bb.0.entry:
- ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: workgroup_one_as_release
; GFX8: bb.0.entry:
- ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10WGP-LABEL: name: workgroup_one_as_release
; GFX10WGP: bb.0.entry:
; GFX10WGP-NEXT: S_WAITCNT_soft 16240
- ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX10WGP-NEXT: S_ENDPGM 0
;
; GFX10CU-LABEL: name: workgroup_one_as_release
; GFX10CU: bb.0.entry:
- ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10CU-NEXT: S_ENDPGM 0
;
; GFX11WGP-LABEL: name: workgroup_one_as_release
; GFX11WGP: bb.0.entry:
; GFX11WGP-NEXT: S_WAITCNT_soft 1015
- ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX11WGP-NEXT: S_ENDPGM 0
;
; GFX11CU-LABEL: name: workgroup_one_as_release
; GFX11CU: bb.0.entry:
- ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11CU-NEXT: S_ENDPGM 0
entry:
fence syncscope("workgroup-one-as") release
@@ -575,38 +575,38 @@ entry:
define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 {
; GFX6-LABEL: name: workgroup_one_as_acq_rel
; GFX6: bb.0.entry:
- ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: workgroup_one_as_acq_rel
; GFX8: bb.0.entry:
- ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel
; GFX10WGP: bb.0.entry:
; GFX10WGP-NEXT: S_WAITCNT_soft 16240
- ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
; GFX10WGP-NEXT: S_ENDPGM 0
;
; GFX10CU-LABEL: name: workgroup_one_as_acq_rel
; GFX10CU: bb.0.entry:
- ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10CU-NEXT: S_ENDPGM 0
;
; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel
; GFX11WGP: bb.0.entry:
; GFX11WGP-NEXT: S_WAITCNT_soft 1015
- ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec
; GFX11WGP-NEXT: S_ENDPGM 0
;
; GFX11CU-LABEL: name: workgroup_one_as_acq_rel
; GFX11CU: bb.0.entry:
- ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11CU-NEXT: S_ENDPGM 0
entry:
fence syncscope("workgroup-one-as") acq_rel
@@ -616,38 +616,38 @@ entry:
define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 {
; GFX6-LABEL: name: workgroup_one_as_seq_cst
; GFX6: bb.0.entry:
- ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: workgroup_one_as_seq_cst
; GFX8: bb.0.entry:
- ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst
; GFX10WGP: bb.0.entry:
; GFX10WGP-NEXT: S_WAITCNT_soft 16240
- ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
; GFX10WGP-NEXT: S_ENDPGM 0
;
; GFX10CU-LABEL: name: workgroup_one_as_seq_cst
; GFX10CU: bb.0.entry:
- ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10CU-NEXT: S_ENDPGM 0
;
; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst
; GFX11WGP: bb.0.entry:
; GFX11WGP-NEXT: S_WAITCNT_soft 1015
- ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec
; GFX11WGP-NEXT: S_ENDPGM 0
;
; GFX11CU-LABEL: name: workgroup_one_as_seq_cst
; GFX11CU: bb.0.entry:
- ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11CU-NEXT: S_ENDPGM 0
entry:
fence syncscope("workgroup-one-as") seq_cst
@@ -1301,39 +1301,39 @@ define amdgpu_kernel void @workgroup_release() #0 {
; GFX6-LABEL: name: workgroup_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 127
- ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: workgroup_release
; GFX8: bb.0.entry:
; GFX8-NEXT: S_WAITCNT_soft 127
- ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10WGP-LABEL: name: workgroup_release
; GFX10WGP: bb.0.entry:
; GFX10WGP-NEXT: S_WAITCNT_soft 112
- ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX10WGP-NEXT: S_ENDPGM 0
;
; GFX10CU-LABEL: name: workgroup_release
; GFX10CU: bb.0.entry:
; GFX10CU-NEXT: S_WAITCNT_soft 49279
- ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10CU-NEXT: S_ENDPGM 0
;
; GFX11WGP-LABEL: name: workgroup_release
; GFX11WGP: bb.0.entry:
; GFX11WGP-NEXT: S_WAITCNT_soft 7
- ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX11WGP-NEXT: S_ENDPGM 0
;
; GFX11CU-LABEL: name: workgroup_release
; GFX11CU: bb.0.entry:
; GFX11CU-NEXT: S_WAITCNT_soft 64519
- ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11CU-NEXT: S_ENDPGM 0
entry:
fence syncscope("workgroup") release
@@ -1344,19 +1344,19 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
; GFX6-LABEL: name: workgroup_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 127
- ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: workgroup_acq_rel
; GFX8: bb.0.entry:
; GFX8-NEXT: S_WAITCNT_soft 127
- ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10WGP-LABEL: name: workgroup_acq_rel
; GFX10WGP: bb.0.entry:
; GFX10WGP-NEXT: S_WAITCNT_soft 112
- ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
; GFX10WGP-NEXT: S_ENDPGM 0
@@ -1364,13 +1364,13 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
; GFX10CU-LABEL: name: workgroup_acq_rel
; GFX10CU: bb.0.entry:
; GFX10CU-NEXT: S_WAITCNT_soft 49279
- ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10CU-NEXT: S_ENDPGM 0
;
; GFX11WGP-LABEL: name: workgroup_acq_rel
; GFX11WGP: bb.0.entry:
; GFX11WGP-NEXT: S_WAITCNT_soft 7
- ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec
; GFX11WGP-NEXT: S_ENDPGM 0
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
; GFX11CU-LABEL: name: workgroup_acq_rel
; GFX11CU: bb.0.entry:
; GFX11CU-NEXT: S_WAITCNT_soft 64519
- ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11CU-NEXT: S_ENDPGM 0
entry:
fence syncscope("workgroup") acq_rel
@@ -1389,19 +1389,19 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 {
; GFX6-LABEL: name: workgroup_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 127
- ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: workgroup_seq_cst
; GFX8: bb.0.entry:
; GFX8-NEXT: S_WAITCNT_soft 127
- ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10WGP-LABEL: name: workgroup_seq_cst
; GFX10WGP: bb.0.entry:
; GFX10WGP-NEXT: S_WAITCNT_soft 112
- ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
; GFX10WGP-NEXT: S_ENDPGM 0
@@ -1409,13 +1409,13 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 {
; GFX10CU-LABEL: name: workgroup_seq_cst
; GFX10CU: bb.0.entry:
; GFX10CU-NEXT: S_WAITCNT_soft 49279
- ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX10CU-NEXT: S_ENDPGM 0
;
; GFX11WGP-LABEL: name: workgroup_seq_cst
; GFX11WGP: bb.0.entry:
; GFX11WGP-NEXT: S_WAITCNT_soft 7
- ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15
+ ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
; GFX11WGP-NEXT: S_WAITCN...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150391
More information about the llvm-branch-commits
mailing list