[clang] [llvm] [AMDGPU][gfx1250] Add 128B cooperative atomics (PR #156418)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 3 09:04:36 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
- Add clang built-ins + sema/codegen
- Add IR Intrinsic + verifier
- Add DAG/GlobalISel codegen for the intrinsics
- Add lowering in SIMemoryLegalizer using a MMO flag.
---
Patch is 156.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156418.diff
24 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+10)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+3-1)
- (modified) clang/include/clang/Sema/SemaAMDGPU.h (+2)
- (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+43)
- (modified) clang/lib/Sema/SemaAMDGPU.cpp (+53)
- (added) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl (+104)
- (added) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl (+66)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+30)
- (modified) llvm/include/llvm/Target/TargetSelectionDAG.td (+13)
- (modified) llvm/lib/IR/Verifier.cpp (+22)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (+6)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+14)
- (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+4)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+75)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+4)
- (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+41-3)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll (+521)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-basic.ll (+49)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll (+479)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-system.ll (+533)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-wavefront.ll (+479)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll (+479)
- (added) llvm/test/Verifier/AMDGPU/llvm.amdgcn.cooperative.atomic.ll (+47)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 7b7dbf7043099..0f9c9720a1199 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -831,5 +831,15 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbI
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
+// GFX12.5 128B cooperative atomics
+TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_32x4B, "ii*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_32x4B, "vi*iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
+
+TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_16x8B, "V2iV2i*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
+
+TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B, "V4iV4i*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 0f3aa9aea215f..3038763bac31b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10692,7 +10692,7 @@ def warn_dangling_reference_captured_by_unknown : Warning<
// Diagnostics based on the Lifetime safety analysis.
def warn_lifetime_safety_loan_expires_permissive : Warning<
- "object whose reference is captured does not live long enough">,
+ "object whose reference is captured does not live long enough">,
InGroup<LifetimeSafetyPermissive>, DefaultIgnore;
def warn_lifetime_safety_loan_expires_strict : Warning<
"object whose reference is captured may not live long enough">,
@@ -13603,4 +13603,6 @@ def warn_acc_var_referenced_lacks_op
// AMDGCN builtins diagnostics
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
+
+def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">;
} // end of sema component.
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index d62c9bb65fadb..bac812a9d4fcf 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -26,6 +26,8 @@ class SemaAMDGPU : public SemaBase {
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore);
+
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs);
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 4c1953e4b8e34..87a46287c4022 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -701,6 +701,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return emitBuiltinWithOneOverloadedType<5>(*this, E,
Intrinsic::amdgcn_load_to_lds);
}
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
+ IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
+ IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
+ IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
+ IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
+ IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
+ IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
+ break;
+ }
+
+ LLVMContext &Ctx = CGM.getLLVMContext();
+ SmallVector<Value *, 5> Args;
+ // last argument is a MD string
+ const unsigned ScopeArg = E->getNumArgs() - 1;
+ for (unsigned i = 0; i != ScopeArg; ++i)
+ Args.push_back(EmitScalarExpr(E->getArg(i)));
+ StringRef Arg = cast<StringLiteral>(E->getArg(ScopeArg)->IgnoreParenCasts())
+ ->getString();
+ llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
+ Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
+ // Intrinsic is typed based on the pointer AS. Pointer is always the first
+ // argument.
+ llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
+ return Builder.CreateCall(F, {Args});
+ }
case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
{llvm::Type::getInt64Ty(getLLVMContext())});
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 1913bb830ccd0..baba503239e9f 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -15,6 +15,7 @@
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Sema/Ownership.h"
#include "clang/Sema/Sema.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/AtomicOrdering.h"
#include <cstdint>
@@ -100,6 +101,14 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
+ return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false);
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
+ case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
+ return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
default:
return false;
}
@@ -145,6 +154,50 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
return false;
}
+bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) {
+ bool Fail = false;
+
+ // First argument is a global or generic pointer.
+ Expr *PtrArg = TheCall->getArg(0);
+ QualType PtrTy = PtrArg->getType()->getPointeeType();
+ unsigned AS = getASTContext().getTargetAddressSpace(PtrTy.getAddressSpace());
+ if (AS != llvm::AMDGPUAS::FLAT_ADDRESS &&
+ AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) {
+ Fail = true;
+ Diag(TheCall->getBeginLoc(), diag::err_amdgcn_coop_atomic_invalid_as)
+ << PtrArg->getSourceRange();
+ }
+
+ // Check atomic ordering
+ Expr *AtomicOrdArg = TheCall->getArg(IsStore ? 2 : 1);
+ Expr::EvalResult AtomicOrdArgRes;
+ if (!AtomicOrdArg->EvaluateAsInt(AtomicOrdArgRes, getASTContext()))
+ llvm_unreachable("Intrinsic requires imm for atomic ordering argument!");
+ auto Ord =
+ llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue());
+
+ // Atomic ordering cannot be acq_rel in any case, acquire for stores or
+ // release for loads.
+ if (!llvm::isValidAtomicOrderingCABI((unsigned)Ord) ||
+ (Ord == llvm::AtomicOrderingCABI::acq_rel) ||
+ Ord == (IsStore ? llvm::AtomicOrderingCABI::acquire
+ : llvm::AtomicOrderingCABI::release)) {
+ return Diag(AtomicOrdArg->getBeginLoc(),
+ diag::warn_atomic_op_has_invalid_memory_order)
+ << 0 << AtomicOrdArg->getSourceRange();
+ }
+
+ // Last argument is a string literal
+ Expr *Arg = TheCall->getArg(TheCall->getNumArgs() - 1);
+ if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts())) {
+ Fail = true;
+ Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal)
+ << Arg->getSourceRange();
+ }
+
+ return Fail;
+}
+
bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs) {
assert(NumDataArgs <= 2);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl
new file mode 100644
index 0000000000000..8768f2f367654
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl
@@ -0,0 +1,104 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s
+
+typedef int v2i __attribute__((ext_vector_type(2)));
+typedef int v4i __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[VAL]], i32 0, metadata [[META4:![0-9]+]])
+// CHECK-NEXT: ret void
+//
+void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent");
+}
+
+// CHECK-LABEL: define dso_local i32 @test_amdgcn_cooperative_atomic_load_32x4B(
+// CHECK-SAME: ptr noundef readonly captures(none) [[ADDR:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr [[ADDR]], i32 0, metadata [[META5:![0-9]+]])
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int test_amdgcn_cooperative_atomic_load_32x4B(int* addr)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_32x4B(addr, __ATOMIC_RELAXED, "");
+}
+
+// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_16x8B(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <2 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p1(ptr addrspace(1) [[GADDR]], <2 x i32> [[VAL]], i32 0, metadata [[META5]])
+// CHECK-NEXT: ret void
+//
+void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, "");
+}
+
+// CHECK-LABEL: define dso_local <2 x i32> @test_amdgcn_cooperative_atomic_load_16x8B(
+// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META6:![0-9]+]])
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, "workgroup");
+}
+
+// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_8x16B(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <4 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p1(ptr addrspace(1) [[GADDR]], <4 x i32> [[VAL]], i32 0, metadata [[META7:![0-9]+]])
+// CHECK-NEXT: ret void
+//
+void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, "singlethread");
+}
+
+// CHECK-LABEL: define dso_local <4 x i32> @test_amdgcn_cooperative_atomic_load_8x16B(
+// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META4]])
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, "agent");
+}
+
+// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_truncated(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i64 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VAL]] to i32
+// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]])
+// CHECK-NEXT: ret void
+//
+void test_amdgcn_cooperative_atomic_store_32x4B_truncated(global int* gaddr, long val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent");
+}
+
+// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_extended(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i8 noundef signext [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[VAL]] to i32
+// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]])
+// CHECK-NEXT: ret void
+//
+void test_amdgcn_cooperative_atomic_store_32x4B_extended(global int* gaddr, char val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent");
+}
+
+//.
+// CHECK: [[META4]] = !{!"agent"}
+// CHECK: [[META5]] = !{!""}
+// CHECK: [[META6]] = !{!"workgroup"}
+// CHECK: [[META7]] = !{!"singlethread"}
+//.
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl
new file mode 100644
index 0000000000000..0ab9a5a43e718
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl
@@ -0,0 +1,66 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s
+
+typedef int v2i __attribute__((ext_vector_type(2)));
+typedef int v4i __attribute__((ext_vector_type(4)));
+
+void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val, const char* syncscope)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
+}
+
+int test_amdgcn_cooperative_atomic_load_32x4B(global int* gaddr, const char* syncscope)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_32x4B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
+}
+
+void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val, const char* syncscope)
+{
+ __builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
+}
+
+v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr, const char* syncscope)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
+}
+
+void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val, const char* syncscope)
+{
+ __builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
+}
+
+v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr, const char* syncscope)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
+}
+
+v4i test_amdgcn_cooperative_atomic_load_8x16B_release(global v4i* gaddr)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELEASE, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
+}
+
+v4i test_amdgcn_cooperative_atomic_load_8x16B_acq_rel(global v4i* gaddr)
+{
+ return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
+}
+
+void test_amdgcn_cooperative_atomic_store_32x4B__sharedptr(local int* addr, int val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_RELAXED, ""); // expected-error {{cooperative atomic requires a global or generic pointer}}
+}
+
+void test_amdgcn_cooperative_atomic_store_32x4B__ordering_not_imm(local int* addr, int ord, int val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, ord, ""); // expected-error {{argument to '__builtin_amdgcn_cooperative_atomic_store_32x4B' must be a constant integer}}
+}
+
+void test_amdgcn_cooperative_atomic_store_32x4B__acquire(int* addr, int ord, int val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQUIRE, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
+}
+
+void test_amdgcn_cooperative_atomic_store_32x4B__acq_rel(int* addr, int ord, int val)
+{
+ __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 3c5ac99512a64..afb12a910b1ee 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3769,6 +3769,36 @@ def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4"
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty],
[IntrNoMem, IntrSpeculatable]>;
+class AMDGPUCooperativeAtomicStore<LLVMType Ty> : Intrinsic <
+ [],
+ [llvm_anyptr_ty, // pointer to store to
+ Ty, // value to store
+ llvm_i32_ty, // C ABI Atomic Ordering ID
+ llvm_metadata_ty], // syncscope
+ [IntrWriteMem, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>,
+ IntrNoCallback, IntrNoFree, IntrConvergent],
+ "",
+ [SDNPMemOperand, SDNPMayStore]
+>;
+
+class AMDGPUCooperativeAtomicLoad<LLVMType Ty> : Intrinsic <
+ [Ty],
+ [llvm_anyptr_ty, // pointer to load from
+ llvm_i32_ty, // C ABI Atomic Ordering ID
+ llvm_metadata_ty], // syncscope
+ [IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ IntrNoCallback, IntrNoFree, IntrConvergent],
+ "",
+ [SDNPMemOperand, SDNPMayLoad]
+>;
+
+def int_amdgcn_cooperative_atomic_load_32x4B : AMDGPUCooperativeAtomicLoad<llvm_i32_ty>;
+def int_amdgcn_cooperative_atomic_store_32x4B : AMDGPUCooperativeAtomicStore<llvm_i32_ty>;
+def int_amdgcn_cooperative_atomic_load_16x8B : AMDGPUCooperativeAtomicLoad<llvm_v2i32_ty>;
+def int_amdgcn_cooperative_atomic_store_16x8B : ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156418
More information about the cfe-commits
mailing list