[llvm-branch-commits] [clang] [Clang][AMDGPU] Add ``amdgcn_av("none")`` attribute for atomic expressions (PR #199622)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue May 26 00:52:59 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Sameer Sahasrabuddhe (ssahasra)
<details>
<summary>Changes</summary>
Add a statement attribute that suppresses MakeAvailable/MakeVisible cache operations on AMDGPU atomic instructions while preserving memory ordering (waits).
The attribute takes a string argument specifying the mode. Currently "none" is the only supported mode. The resulting atomic or fence instruction carries !mmra !{!"amdgcn-av", !"none"} metadata.
Assisted-By: Claude Opus 4.6
---
Patch is 28.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/199622.diff
13 Files Affected:
- (modified) clang/include/clang/Basic/Attr.td (+6)
- (modified) clang/include/clang/Basic/AttrDocs.td (+23)
- (modified) clang/include/clang/Basic/DiagnosticGroups.td (+2)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+4)
- (modified) clang/lib/CodeGen/CGAtomic.cpp (+8-1)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+42-26)
- (modified) clang/lib/CodeGen/CGStmt.cpp (+5)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+4)
- (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+1)
- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+6)
- (modified) clang/lib/Sema/SemaStmtAttr.cpp (+57)
- (added) clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp (+43)
- (added) clang/test/CodeGen/AMDGPU/amdgcn-av-none-attr.cpp (+131)
``````````diff
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 70b5773f95b08..b11b3ed51efc4 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2520,6 +2520,12 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}
+def AMDGCNAV : StmtAttr, TargetSpecificAttr<TargetAMDGPU> {
+ let Spellings = [Clang<"amdgcn_av">];
+ let Args = [StringArgument<"Mode">];
+ let Documentation = [AMDGCNAVDocs];
+}
+
def BPFPreserveAccessIndex : InheritableAttr,
TargetSpecificAttr<TargetBPF> {
let Spellings = [Clang<"preserve_access_index">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 87b9053be7cb6..f2835bffd4bc5 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3543,6 +3543,29 @@ An error will be given if:
}];
}
+def AMDGCNAVDocs : Documentation {
+ let Category = DocCatAMDGPUAttributes;
+ let Content = [{
+This attribute controls MakeAvailable and MakeVisible cache operations on
+AMDGPU synchronization operations. It takes a string argument specifying the
+mode.
+
+When placed on a statement containing a C/C++ atomic builtin call, the
+resulting atomic or fence instruction will carry ``!mmra !{!"amdgcn-av",
+!"<mode>"}`` metadata.
+
+The supported modes are:
+
+- ``"none"``: Skip cache writeback (on release) and cache invalidation (on
+ acquire), while preserving memory ordering (waits).
+
+.. code-block:: c++
+
+ [[clang::amdgcn_av("none")]] __atomic_thread_fence(__ATOMIC_SEQ_CST);
+ [[clang::amdgcn_av("none")]] __atomic_fetch_add(ptr, 1, __ATOMIC_ACQ_REL);
+ }];
+}
+
def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
let Content = [{
Clang supports several different calling conventions, depending on the target
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 8031f99419bdc..51787935e1280 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1968,3 +1968,5 @@ def ExperimentalOption : DiagGroup<"experimental-option">;
// Warnings about unguarded usages of AMDGPU target specific constructs
def UnguardedBuiltinUsageAMDGPU : DiagGroup<"amdgpu-unguarded-builtin-usage">;
+
+def AMDGCNAVNonAtomic : DiagGroup<"amdgcn-av-non-atomic">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index dbe6cb2c3a41c..c91b5774d0272 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -14232,6 +14232,10 @@ def note_amdgcn_unguarded_builtin_silence
def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">;
+def warn_amdgcn_av_requires_atomic : Warning<
+ "%0 attribute only applies to atomic operations">,
+ InGroup<AMDGCNAVNonAtomic>;
+
def err_amdgcn_dmask_has_too_many_bits_set
: Error<"dmask argument cannot have more bits set than there are elements "
"in return type">;
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 270965b109943..92ee116ca8dc0 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -632,6 +632,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
Load->setAtomic(Order, Scope);
Load->setVolatile(E->isVolatile());
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Load, E);
CGF.maybeAttachRangeForLoad(Load, E->getValueType(), E->getExprLoc());
auto *I = CGF.Builder.CreateStore(Load, Dest);
CGF.addInstToCurrentSourceAtom(I, Load);
@@ -649,6 +650,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
Store->setAtomic(Order, Scope);
Store->setVolatile(E->isVolatile());
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Store, E);
CGF.addInstToCurrentSourceAtom(Store, LoadVal1);
return;
}
@@ -795,6 +797,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
CGF.Builder.CreateStore(CGF.Builder.getInt8(0), Ptr);
Store->setAtomic(Order, Scope);
Store->setVolatile(E->isVolatile());
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Store, E);
CGF.addInstToCurrentSourceAtom(Store, nullptr);
return;
}
@@ -1586,6 +1589,7 @@ llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
Addr = castToAtomicIntPointer(Addr);
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
Load->setAtomic(AO);
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Load);
// Other decoration.
if (IsVolatile)
@@ -1753,6 +1757,7 @@ std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
// Other decoration.
Inst->setVolatile(LVal.isVolatileQualified());
Inst->setWeak(IsWeak);
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Inst);
// Okay, turn that back into the original value type.
auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
@@ -2108,8 +2113,10 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
else if (AO == llvm::AtomicOrdering::AcquireRelease)
AO = llvm::AtomicOrdering::Release;
// Initializations don't need to be atomic.
- if (!isInit)
+ if (!isInit) {
store->setAtomic(AO);
+ getTargetHooks().setTargetAtomicMetadata(*this, *store);
+ }
// Other decoration.
if (IsVolatile)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b1d727cb5e0ad..4e36d4fe1dc32 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5367,24 +5367,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *Order = EmitScalarExpr(E->getArg(0));
if (isa<llvm::ConstantInt>(Order)) {
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+ llvm::FenceInst *Fence = nullptr;
switch (ord) {
case 0: // memory_order_relaxed
default: // invalid order
break;
case 1: // memory_order_consume
case 2: // memory_order_acquire
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
break;
case 3: // memory_order_release
- Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
break;
case 4: // memory_order_acq_rel
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
break;
case 5: // memory_order_seq_cst
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
+ Fence = Builder.CreateFence(
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID);
break;
}
+ if (Fence)
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
return RValue::get(nullptr);
}
@@ -5399,23 +5403,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
Builder.SetInsertPoint(AcquireBB);
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this, *Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(1), AcquireBB);
SI->addCase(Builder.getInt32(2), AcquireBB);
Builder.SetInsertPoint(ReleaseBB);
- Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this, *Builder.CreateFence(llvm::AtomicOrdering::Release, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(3), ReleaseBB);
Builder.SetInsertPoint(AcqRelBB);
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this,
+ *Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(4), AcqRelBB);
Builder.SetInsertPoint(SeqCstBB);
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this, *Builder.CreateFence(
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(5), SeqCstBB);
@@ -5433,40 +5443,43 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
? ScopeModel->map(Scp->getZExtValue())
: ScopeModel->map(ScopeModel->getFallBackValue());
+ llvm::FenceInst *Fence = nullptr;
switch (Ord->getZExtValue()) {
case 0: // memory_order_relaxed
default: // invalid order
break;
case 1: // memory_order_consume
case 2: // memory_order_acquire
- Builder.CreateFence(
+ Fence = Builder.CreateFence(
llvm::AtomicOrdering::Acquire,
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
llvm::AtomicOrdering::Acquire,
getLLVMContext()));
break;
case 3: // memory_order_release
- Builder.CreateFence(
+ Fence = Builder.CreateFence(
llvm::AtomicOrdering::Release,
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
llvm::AtomicOrdering::Release,
getLLVMContext()));
break;
case 4: // memory_order_acq_rel
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
- getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(), SS,
- llvm::AtomicOrdering::AcquireRelease,
- getLLVMContext()));
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
+ getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS,
+ llvm::AtomicOrdering::AcquireRelease,
+ getLLVMContext()));
break;
case 5: // memory_order_seq_cst
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(), SS,
- llvm::AtomicOrdering::SequentiallyConsistent,
- getLLVMContext()));
+ Fence = Builder.CreateFence(
+ llvm::AtomicOrdering::SequentiallyConsistent,
+ getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS, llvm::AtomicOrdering::SequentiallyConsistent,
+ getLLVMContext()));
break;
}
+ if (Fence)
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
return RValue::get(nullptr);
}
@@ -5525,9 +5538,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
? ScopeModel->map(Scp->getZExtValue())
: ScopeModel->map(ScopeModel->getFallBackValue());
- Builder.CreateFence(Ordering,
- getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(), SS, Ordering, getLLVMContext()));
+ llvm::FenceInst *Fence = Builder.CreateFence(
+ Ordering, getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS, Ordering, getLLVMContext()));
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
Builder.CreateBr(ContBB);
} else {
llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
@@ -5541,9 +5555,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
SI->addCase(Builder.getInt32(Scp), B);
Builder.SetInsertPoint(B);
- Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(), ScopeModel->map(Scp),
- Ordering, getLLVMContext()));
+ llvm::FenceInst *Fence = Builder.CreateFence(
+ Ordering, getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), ScopeModel->map(Scp), Ordering,
+ getLLVMContext()));
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
Builder.CreateBr(ContBB);
}
}
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index b70667d04d1f6..e150f5da61f2e 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -778,6 +778,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
bool noinline = false;
bool alwaysinline = false;
bool noconvergent = false;
+ StringRef amdgcnAVMode;
HLSLControlFlowHintAttr::Spelling flattenOrBranch =
HLSLControlFlowHintAttr::SpellingNotCalculated;
const CallExpr *musttail = nullptr;
@@ -815,6 +816,9 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
case attr::Atomic:
AA = cast<AtomicAttr>(A);
break;
+ case attr::AMDGCNAV:
+ amdgcnAVMode = cast<AMDGCNAVAttr>(A)->getMode();
+ break;
case attr::HLSLControlFlowHint: {
flattenOrBranch = cast<HLSLControlFlowHintAttr>(A)->getSemanticSpelling();
} break;
@@ -824,6 +828,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline);
SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline);
SaveAndRestore save_noconvergent(InNoConvergentAttributedStmt, noconvergent);
+ SaveAndRestore save_amdgcnav(AMDGCNAVMode, amdgcnAVMode);
SaveAndRestore save_musttail(MustTailCall, musttail);
SaveAndRestore save_flattenOrBranch(HLSLControlFlowAttr, flattenOrBranch);
CGAtomicOptionsRAII AORAII(CGM, AA);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 3ce0ef1235561..fc31ec6aa8a19 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -622,6 +622,10 @@ class CodeGenFunction : public CodeGenTypeCache {
/// True if the current statement has noconvergent attribute.
bool InNoConvergentAttributedStmt = false;
+ /// The mode string from the amdgcn_av attribute on the current statement,
+ /// or empty if the attribute is not present.
+ StringRef AMDGCNAVMode;
+
/// HLSL Branch attribute.
HLSLControlFlowHintAttr::Spelling HLSLControlFlowAttr =
HLSLControlFlowHintAttr::SpellingNotCalculated;
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index a88dbb71b3ddf..3816dd638fe14 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -1875,6 +1875,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
FenceInst *Fence = Builder.CreateFence(AO, SSID);
if (E->getNumArgs() > 2)
AddAMDGPUFenceAddressSpaceMMRA(Fence, E);
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
return Fence;
}
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0d36f166328c7..4a9ae3ab8f7f9 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -10,6 +10,7 @@
#include "TargetInfo.h"
#include "clang/AST/DeclCXX.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
using namespace clang;
@@ -633,6 +634,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
}
+ if (!CGF.AMDGCNAVMode.empty()) {
+ llvm::MMRAMetadata::appendTag(AtomicInst,
+ {{"amdgcn-av", CGF.AMDGCNAVMode}});
+ }
+
if (!RMW)
return;
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 58d5332565d10..b13b1d2256a92 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -351,6 +351,61 @@ static Attr *handleMustTailAttr(Sema &S, Stmt *St, const ParsedAttr &A,
return ::new (S.Context) MustTailAttr(S.Context, A);
}
+/// Return true if St is an atomic operation or fence builtin call.
+static bool isAtomicOp(const Stmt *St) {
+ const Expr *E = dyn_cast<Expr>(St);
+ if (!E)
+ return false;
+
+ E = E->IgnoreParenCasts();
+
+ if (isa<AtomicExpr>(E))
+ return true;
+
+ const CallExpr *CE = dyn_cast<CallExpr>(E);
+ if (!CE)
+ return false;
+
+ unsigned BuiltinID = CE->getBuiltinCallee();
+ switch (BuiltinID) {
+ case Builtin::BI__atomic_thread_fence:
+ case Builtin::BI__atomic_signal_fence:
+ case Builtin::BI__c11_atomic_thread_fence:
+ case Builtin::BI__c11_atomic_signal_fence:
+ case Builtin::BI__scoped_atomic_thread_fence:
+ return true;
+ default:
+ break;
+ }
+
+ // Check for target-specific fence builtins.
+ if (const FunctionDecl *FD = CE->getDirectCallee()) {
+ if (FD->getName() == "__builtin_amdgcn_fence")
+ return true;
+ }
+
+ return false;
+}
+
+static Attr *handleAMDGCNAVAttr(Sema &S, Stmt *St, const ParsedAttr &A,
+ SourceRange Range) {
+ StringRef Mode;
+ if (!S.checkStringLiteralArgumentAttr(A, 0, Mode))
+ return nullptr;
+
+ if (Mode != "none") {
+ S.Diag(A.getLoc(), diag::warn_attribute_type_not_supported) << A << Mode;
+ return nullptr;
+ }
+
+ if (!isAtomicOp(St)) {
+ S.Diag(A.getLoc(), diag::warn_amdgcn_av_requires_atomic) << A;
+ return nullptr;
+ }
+
+ return ::new (S.Context) AMDGCNAVAttr(S.Context, A, Mode);
+}
+
static Attr *handleLikely(Sema &S, Stmt *St, const ParsedAttr &A,
SourceRange Range) {
@@ -730,6 +785,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
return handleNoInlineAttr(S, St, A, Range);
case ParsedAttr::AT_MustTail:
return handleMustTailAttr(S, St, A, Range);
+ case ParsedAttr::AT_AMDGCNAV:
+ return handleAMDGCNAVAttr(S, St, A, Range);
case ParsedAttr::AT_Likely:
return handleLikely(S, St, A, Range);
case ParsedAttr::AT_Unlikely:
diff --git a/clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp b/clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp
new file mode 100644
index 0000000000000..1f193a865be97
--- /dev/null
+++ b/clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp
@@ -0,0 +1,43 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -verify -o - \
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+
+// Test that [[clang::amdgcn_av("none")]] on non-atomic statements emits a
+// warning and does NOT produce !mmra metadata.
+
+// CHECK-LABEL: define {{.*}} @_Z16test_plain_storePii(
+// CHECK-NOT: !mmra
+// CHECK: ret void
+void test_plain_store(int *p, int val) {
+ [[clang::amdgcn_av("none")]] *p = val; // expected-warning {{'clang::amdgcn_av' attribute only applies to atomic operations}}
+}
+
+// CHECK-LABEL: define {{.*}} @_Z15test_plain_callv(
+// CHECK-NOT: !mmra
+// CHECK: ret void
+extern void foo();
+void test_plain_call() {
+ [[clang::amdgcn_av("none")]] foo(); // expected-warning {{'clang::amdgcn_av' attribute only applies to atomic operations}}
+}
+
+// CHECK-LABEL: define {{.*}} @_Z18test_for_with_atomPi(
+// CHECK-NOT: !mmra
+// C...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/199622
More information about the llvm-branch-commits
mailing list