[clang] 675cefb - [AMDGPU] Introduce Clang builtins to be mapped to AMDGCN atomic inc/dec intrinsics
Saiyedul Islam via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 9 10:03:18 PDT 2020
Author: Saiyedul Islam
Date: 2020-06-09T17:02:58Z
New Revision: 675cefbf60270f59057972e33365a09590fb3694
URL: https://github.com/llvm/llvm-project/commit/675cefbf60270f59057972e33365a09590fb3694
DIFF: https://github.com/llvm/llvm-project/commit/675cefbf60270f59057972e33365a09590fb3694.diff
LOG: [AMDGPU] Introduce Clang builtins to be mapped to AMDGCN atomic inc/dec intrinsics
Summary:
__builtin_amdgcn_atomic_inc32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope)
__builtin_amdgcn_atomic_inc64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope)
__builtin_amdgcn_atomic_dec32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope)
__builtin_amdgcn_atomic_dec64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope)
First and second arguments gets transparently passed to the amdgcn atomic
inc/dec intrinsic. Fifth argument of the intrinsic is set as true if the
first argument of the builtin is a volatile pointer. The third argument of
this builtin is one of the memory-ordering specifiers ATOMIC_ACQUIRE,
ATOMIC_RELEASE, ATOMIC_ACQ_REL, or ATOMIC_SEQ_CST following C++11 memory
model semantics. This is mapped to corresponding LLVM atomic memory ordering
for the atomic inc/dec instruction using CLANG atomic C ABI. The fourth
argument is an AMDGPU-specific synchronization scope defined as string.
Reviewers: arsenm, sameerds, JonChesterfield, jdoerfert
Reviewed By: arsenm, sameerds
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, kerbowa, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D80804
Added:
clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Sema/SemaChecking.cpp
clang/test/SemaOpenCL/builtins-amdgcn-error.cl
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 28379142b05a..9add10c64962 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -60,6 +60,12 @@ BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
+BUILTIN(__builtin_amdgcn_atomic_inc32, "ZiZiD*ZiUicC*", "n")
+BUILTIN(__builtin_amdgcn_atomic_inc64, "WiWiD*WiUicC*", "n")
+
+BUILTIN(__builtin_amdgcn_atomic_dec32, "ZiZiD*ZiUicC*", "n")
+BUILTIN(__builtin_amdgcn_atomic_dec64, "WiWiD*WiUicC*", "n")
+
// FIXME: Need to disallow constant address space.
BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bfc78ce94892..f0092e2fa7ec 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14301,8 +14301,49 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
}
} // namespace
+// For processing memory ordering and memory scope arguments of various
+// amdgcn builtins.
+// \p Order takes a C++11 comptabile memory-ordering specifier and converts
+// it into LLVM's memory ordering specifier using atomic C ABI, and writes
+// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
+// specific SyncScopeID and writes it to \p SSID.
+bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
+ llvm::AtomicOrdering &AO,
+ llvm::SyncScope::ID &SSID) {
+ if (isa<llvm::ConstantInt>(Order)) {
+ int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+
+ // Map C11/C++11 memory ordering to LLVM memory ordering
+ switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
+ case llvm::AtomicOrderingCABI::acquire:
+ AO = llvm::AtomicOrdering::Acquire;
+ break;
+ case llvm::AtomicOrderingCABI::release:
+ AO = llvm::AtomicOrdering::Release;
+ break;
+ case llvm::AtomicOrderingCABI::acq_rel:
+ AO = llvm::AtomicOrdering::AcquireRelease;
+ break;
+ case llvm::AtomicOrderingCABI::seq_cst:
+ AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ case llvm::AtomicOrderingCABI::consume:
+ case llvm::AtomicOrderingCABI::relaxed:
+ break;
+ }
+
+ StringRef scp;
+ llvm::getConstantStringInfo(Scope, scp);
+ SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+ return true;
+ }
+ return false;
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
+ llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ llvm::SyncScope::ID SSID;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
@@ -14507,38 +14548,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_fence: {
- llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
- llvm::SyncScope::ID SSID;
- Value *Order = EmitScalarExpr(E->getArg(0));
- Value *Scope = EmitScalarExpr(E->getArg(1));
+ if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1)), AO, SSID))
+ return Builder.CreateFence(AO, SSID);
+ LLVM_FALLTHROUGH;
+ }
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
+ unsigned BuiltinAtomicOp;
+ llvm::Type *ResultType = ConvertType(E->getType());
- if (isa<llvm::ConstantInt>(Order)) {
- int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
+ break;
+ }
- // Map C11/C++11 memory ordering to LLVM memory ordering
- switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
- case llvm::AtomicOrderingCABI::acquire:
- AO = llvm::AtomicOrdering::Acquire;
- break;
- case llvm::AtomicOrderingCABI::release:
- AO = llvm::AtomicOrdering::Release;
- break;
- case llvm::AtomicOrderingCABI::acq_rel:
- AO = llvm::AtomicOrdering::AcquireRelease;
- break;
- case llvm::AtomicOrderingCABI::seq_cst:
- AO = llvm::AtomicOrdering::SequentiallyConsistent;
- break;
- case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence
- case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence
- break;
- }
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
- StringRef scp;
- llvm::getConstantStringInfo(Scope, scp);
- SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+ llvm::Function *F =
+ CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
- return Builder.CreateFence(AO, SSID);
+ if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)), AO, SSID)) {
+
+ // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
+ // scope as unsigned values
+ Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
+ Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
+
+ QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
+ bool Volatile =
+ PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
+ Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
+
+ return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
}
LLVM_FALLTHROUGH;
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index c9fcaf9d0f5e..48b2368133e2 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3988,6 +3988,9 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E);
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
+ llvm::AtomicOrdering &AO,
+ llvm::SyncScope::ID &SSID);
private:
enum class MSVCIntrin;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 0e451d6e5867..85126e029244 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3070,41 +3070,56 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *TheCall) {
+ // position of memory order and scope arguments in the builtin
+ unsigned OrderIndex, ScopeIndex;
switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_fence: {
- ExprResult Arg = TheCall->getArg(0);
- auto ArgExpr = Arg.get();
- Expr::EvalResult ArgResult;
-
- if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
- return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
- << ArgExpr->getType();
- int ord = ArgResult.Val.getInt().getZExtValue();
-
- // Check valididty of memory ordering as per C11 / C++11's memody model.
- switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
- case llvm::AtomicOrderingCABI::acquire:
- case llvm::AtomicOrderingCABI::release:
- case llvm::AtomicOrderingCABI::acq_rel:
- case llvm::AtomicOrderingCABI::seq_cst:
- break;
- default: {
- return Diag(ArgExpr->getBeginLoc(),
- diag::warn_atomic_op_has_invalid_memory_order)
- << ArgExpr->getSourceRange();
- }
- }
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
+ OrderIndex = 2;
+ ScopeIndex = 3;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_fence:
+ OrderIndex = 0;
+ ScopeIndex = 1;
+ break;
+ default:
+ return false;
+ }
- Arg = TheCall->getArg(1);
- ArgExpr = Arg.get();
- Expr::EvalResult ArgResult1;
- // Check that sync scope is a constant literal
- if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
- Context))
- return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
- << ArgExpr->getType();
- } break;
+ ExprResult Arg = TheCall->getArg(OrderIndex);
+ auto ArgExpr = Arg.get();
+ Expr::EvalResult ArgResult;
+
+ if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
+ return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+ << ArgExpr->getType();
+ int ord = ArgResult.Val.getInt().getZExtValue();
+
+ // Check valididty of memory ordering as per C11 / C++11's memody model.
+ switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
+ case llvm::AtomicOrderingCABI::acquire:
+ case llvm::AtomicOrderingCABI::release:
+ case llvm::AtomicOrderingCABI::acq_rel:
+ case llvm::AtomicOrderingCABI::seq_cst:
+ break;
+ default: {
+ return Diag(ArgExpr->getBeginLoc(),
+ diag::warn_atomic_op_has_invalid_memory_order)
+ << ArgExpr->getSourceRange();
+ }
}
+
+ Arg = TheCall->getArg(ScopeIndex);
+ ArgExpr = Arg.get();
+ Expr::EvalResult ArgResult1;
+ // Check that sync scope is a constant literal
+ if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
+ Context))
+ return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
+ << ArgExpr->getType();
+
return false;
}
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
new file mode 100644
index 000000000000..535c3d754954
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -0,0 +1,253 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
+// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
+
+__attribute__((device)) void test_non_volatile_parameter32(int *ptr) {
+ // CHECK-LABEL: test_non_volatile_parameter32
+ int res;
+ // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32**
+ // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32*
+ // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load i32, i32* %1, align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load i32, i32* %5, align 4
+ // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_non_volatile_parameter64(__INT64_TYPE__ *ptr) {
+ // CHECK-LABEL: test_non_volatile_parameter64
+ __INT64_TYPE__ res;
+ // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64**
+ // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64*
+ // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load i64, i64* %1, align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load i64, i64* %5, align 8
+ // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_volatile_parameter32(volatile int *ptr) {
+ // CHECK-LABEL: test_volatile_parameter32
+ int res;
+ // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32**
+ // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32*
+ // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load volatile i32, i32* %1, align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load volatile i32, i32* %5, align 4
+ // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_volatile_parameter64(volatile __INT64_TYPE__ *ptr) {
+ // CHECK-LABEL: test_volatile_parameter64
+ __INT64_TYPE__ res;
+ // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64**
+ // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64*
+ // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load volatile i64, i64* %1, align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load volatile i64, i64* %5, align 8
+ // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_shared32() {
+ // CHECK-LABEL: test_shared32
+ __attribute__((shared)) int val;
+
+ // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_shared64() {
+ // CHECK-LABEL: test_shared64
+ __attribute__((shared)) __INT64_TYPE__ val;
+
+ // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+int global_val32;
+__attribute__((device)) void test_global32() {
+ // CHECK-LABEL: test_global32
+ // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ global_val32 = __builtin_amdgcn_atomic_dec32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__INT64_TYPE__ global_val64;
+__attribute__((device)) void test_global64() {
+ // CHECK-LABEL: test_global64
+ // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ global_val64 = __builtin_amdgcn_atomic_dec64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((constant)) int cval32;
+__attribute__((device)) void test_constant32() {
+ // CHECK-LABEL: test_constant32
+ int local_val;
+
+ // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
+ // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %1, i32* %local_val.ascast, align 4
+ local_val = __builtin_amdgcn_atomic_inc32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* %local_val.ascast, align 4
+ local_val = __builtin_amdgcn_atomic_dec32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((constant)) __INT64_TYPE__ cval64;
+__attribute__((device)) void test_constant64() {
+ // CHECK-LABEL: test_constant64
+ __INT64_TYPE__ local_val;
+
+ // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
+ // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %1, i64* %local_val.ascast, align 8
+ local_val = __builtin_amdgcn_atomic_inc64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* %local_val.ascast, align 8
+ local_val = __builtin_amdgcn_atomic_dec64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_order32() {
+ // CHECK-LABEL: test_order32
+ __attribute__((shared)) int val;
+
+ // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %0, i32 4, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, "workgroup");
+
+ // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %2, i32 5, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELEASE, "workgroup");
+
+ // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %4, i32 6, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQ_REL, "workgroup");
+
+ // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %6, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_order64() {
+ // CHECK-LABEL: test_order64
+ __attribute__((shared)) __INT64_TYPE__ val;
+
+ // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %0, i32 4, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, "workgroup");
+
+ // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %2, i32 5, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELEASE, "workgroup");
+
+ // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %4, i32 6, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQ_REL, "workgroup");
+
+ // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %6, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_scope32() {
+ // CHECK-LABEL: test_scope32
+ __attribute__((shared)) int val;
+
+ // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %0, i32 7, i32 1, i1 false)
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %4, i32 7, i32 3, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "agent");
+
+ // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %6, i32 7, i32 4, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "wavefront");
+}
+
+__attribute__((device)) void test_scope64() {
+ // CHECK-LABEL: test_scope64
+ __attribute__((shared)) __INT64_TYPE__ val;
+
+ // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %0, i32 7, i32 1, i1 false)
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %4, i32 7, i32 3, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "agent");
+
+ // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %6, i32 7, i32 4, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "wavefront");
+}
diff --git a/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
new file mode 100644
index 000000000000..c08b00bac0cf
--- /dev/null
+++ b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
@@ -0,0 +1,18 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -x hip -fcuda-is-device -o - -emit-llvm -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_host() {
+ int val;
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "");
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
index dbe2900b600b..e2d9082f8f72 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -144,3 +144,51 @@ void test_s_setreg(int x, int y) {
__builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
__builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
}
+
+void test_atomic_inc32() {
+ int val = 17;
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc32(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
+
+void test_atomic_inc64() {
+ __INT64_TYPE__ val = 17;
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc64(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
+
+void test_atomic_dec32() {
+ int val = 17;
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec32(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
+
+void test_atomic_dec64() {
+ __INT64_TYPE__ val = 17;
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec64(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
More information about the cfe-commits
mailing list