r240669 - [CUDA] Implemented __nvvm_atom_*_gen_* builtins.
Artem Belevich
tra at google.com
Thu Jun 25 11:29:43 PDT 2015
Author: tra
Date: Thu Jun 25 13:29:42 2015
New Revision: 240669
URL: http://llvm.org/viewvc/llvm-project?rev=240669&view=rev
Log:
[CUDA] Implemented __nvvm_atom_*_gen_* builtins.
Integer variants are implemented as atomicrmw or cmpxchg instructions.
Atomic add for floating point (__nvvm_atom_add_gen_f()) is implemented
as a call to an overloaded @llvm.nvvm.atomic.load.add.f32.* LVVM
intrinsic.
Differential Revision: http://reviews.llvm.org/D10666
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/test/CodeGen/builtins-nvptx.c
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=240669&r1=240668&r2=240669&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 25 13:29:42 2015
@@ -82,9 +82,9 @@ static Value *EmitFromInt(CodeGenFunctio
/// Utility to insert an atomic instruction based on Instrinsic::ID
/// and the expression node.
-static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
- llvm::AtomicRMWInst::BinOp Kind,
- const CallExpr *E) {
+static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
+ llvm::AtomicRMWInst::BinOp Kind,
+ const CallExpr *E) {
QualType T = E->getType();
assert(E->getArg(0)->getType()->isPointerType());
assert(CGF.getContext().hasSameUnqualifiedType(T,
@@ -108,8 +108,13 @@ static RValue EmitBinaryAtomic(CodeGenFu
llvm::Value *Result =
CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
llvm::SequentiallyConsistent);
- Result = EmitFromInt(CGF, Result, T, ValueType);
- return RValue::get(Result);
+ return EmitFromInt(CGF, Result, T, ValueType);
+}
+
+static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
+ llvm::AtomicRMWInst::BinOp Kind,
+ const CallExpr *E) {
+ return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
}
/// Utility to insert an atomic instruction based Instrinsic::ID and
@@ -151,6 +156,47 @@ static RValue EmitBinaryAtomicPost(CodeG
return RValue::get(Result);
}
+/// @brief Utility to insert an atomic cmpxchg instruction.
+///
+/// @param CGF The current codegen function.
+/// @param E Builtin call expression to convert to cmpxchg.
+/// arg0 - address to operate on
+/// arg1 - value to compare with
+/// arg2 - new value
+/// @param ReturnBool Specifies whether to return success flag of
+/// cmpxchg result or the old value.
+///
+/// @returns result of cmpxchg, according to ReturnBool
+static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
+ bool ReturnBool) {
+ QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
+ llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
+ unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
+
+ llvm::IntegerType *IntType = llvm::IntegerType::get(
+ CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
+ llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
+
+ Value *Args[3];
+ Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
+ Args[1] = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Type *ValueType = Args[1]->getType();
+ Args[1] = EmitToInt(CGF, Args[1], T, IntType);
+ Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
+
+ Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
+ llvm::SequentiallyConsistent,
+ llvm::SequentiallyConsistent);
+ if (ReturnBool)
+ // Extract boolean success flag and zext it to int.
+ return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
+ CGF.ConvertType(E->getType()));
+ else
+ // Extract old value and emit it using the same type as compare value.
+ return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
+ ValueType);
+}
+
/// EmitFAbs - Emit a call to @llvm.fabs().
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -1057,58 +1103,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(
case Builtin::BI__sync_val_compare_and_swap_2:
case Builtin::BI__sync_val_compare_and_swap_4:
case Builtin::BI__sync_val_compare_and_swap_8:
- case Builtin::BI__sync_val_compare_and_swap_16: {
- QualType T = E->getType();
- llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
- unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
-
- llvm::IntegerType *IntType =
- llvm::IntegerType::get(getLLVMContext(),
- getContext().getTypeSize(T));
- llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
-
- Value *Args[3];
- Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
- Args[1] = EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(*this, Args[1], T, IntType);
- Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
-
- Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
- llvm::SequentiallyConsistent,
- llvm::SequentiallyConsistent);
- Result = Builder.CreateExtractValue(Result, 0);
- Result = EmitFromInt(*this, Result, T, ValueType);
- return RValue::get(Result);
- }
+ case Builtin::BI__sync_val_compare_and_swap_16:
+ return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
case Builtin::BI__sync_bool_compare_and_swap_1:
case Builtin::BI__sync_bool_compare_and_swap_2:
case Builtin::BI__sync_bool_compare_and_swap_4:
case Builtin::BI__sync_bool_compare_and_swap_8:
- case Builtin::BI__sync_bool_compare_and_swap_16: {
- QualType T = E->getArg(1)->getType();
- llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
- unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
-
- llvm::IntegerType *IntType =
- llvm::IntegerType::get(getLLVMContext(),
- getContext().getTypeSize(T));
- llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
-
- Value *Args[3];
- Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
- Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
- Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
-
- Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
- llvm::SequentiallyConsistent,
- llvm::SequentiallyConsistent);
- Value *Result = Builder.CreateExtractValue(Pair, 1);
- // zext bool to int.
- Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
- return RValue::get(Result);
- }
+ case Builtin::BI__sync_bool_compare_and_swap_16:
+ return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
case Builtin::BI__sync_swap_1:
case Builtin::BI__sync_swap_2:
@@ -1880,6 +1883,9 @@ Value *CodeGenFunction::EmitTargetBuilti
return EmitAMDGPUBuiltinExpr(BuiltinID, E);
case llvm::Triple::systemz:
return EmitSystemZBuiltinExpr(BuiltinID, E);
+ case llvm::Triple::nvptx:
+ case llvm::Triple::nvptx64:
+ return EmitNVPTXBuiltinExpr(BuiltinID, E);
default:
return nullptr;
}
@@ -6899,5 +6905,74 @@ Value *CodeGenFunction::EmitSystemZBuilt
default:
return nullptr;
+ }
+}
+
+Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ switch (BuiltinID) {
+ case NVPTX::BI__nvvm_atom_add_gen_i:
+ case NVPTX::BI__nvvm_atom_add_gen_l:
+ case NVPTX::BI__nvvm_atom_add_gen_ll:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
+
+ case NVPTX::BI__nvvm_atom_sub_gen_i:
+ case NVPTX::BI__nvvm_atom_sub_gen_l:
+ case NVPTX::BI__nvvm_atom_sub_gen_ll:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
+
+ case NVPTX::BI__nvvm_atom_and_gen_i:
+ case NVPTX::BI__nvvm_atom_and_gen_l:
+ case NVPTX::BI__nvvm_atom_and_gen_ll:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
+
+ case NVPTX::BI__nvvm_atom_or_gen_i:
+ case NVPTX::BI__nvvm_atom_or_gen_l:
+ case NVPTX::BI__nvvm_atom_or_gen_ll:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
+
+ case NVPTX::BI__nvvm_atom_xor_gen_i:
+ case NVPTX::BI__nvvm_atom_xor_gen_l:
+ case NVPTX::BI__nvvm_atom_xor_gen_ll:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
+
+ case NVPTX::BI__nvvm_atom_xchg_gen_i:
+ case NVPTX::BI__nvvm_atom_xchg_gen_l:
+ case NVPTX::BI__nvvm_atom_xchg_gen_ll:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
+
+ case NVPTX::BI__nvvm_atom_max_gen_i:
+ case NVPTX::BI__nvvm_atom_max_gen_l:
+ case NVPTX::BI__nvvm_atom_max_gen_ll:
+ case NVPTX::BI__nvvm_atom_max_gen_ui:
+ case NVPTX::BI__nvvm_atom_max_gen_ul:
+ case NVPTX::BI__nvvm_atom_max_gen_ull:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
+
+ case NVPTX::BI__nvvm_atom_min_gen_i:
+ case NVPTX::BI__nvvm_atom_min_gen_l:
+ case NVPTX::BI__nvvm_atom_min_gen_ll:
+ case NVPTX::BI__nvvm_atom_min_gen_ui:
+ case NVPTX::BI__nvvm_atom_min_gen_ul:
+ case NVPTX::BI__nvvm_atom_min_gen_ull:
+ return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
+
+ case NVPTX::BI__nvvm_atom_cas_gen_i:
+ case NVPTX::BI__nvvm_atom_cas_gen_l:
+ case NVPTX::BI__nvvm_atom_cas_gen_ll:
+ return MakeAtomicCmpXchgValue(*this, E, true);
+
+ case NVPTX::BI__nvvm_atom_add_gen_f: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ // atomicrmw only deals with integer arguments so we need to use
+ // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
+ Value *FnALAF32 =
+ CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
+ return Builder.CreateCall(FnALAF32, {Ptr, Val});
+ }
+
+ default:
+ return nullptr;
}
}
Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=240669&r1=240668&r2=240669&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Thu Jun 25 13:29:42 2015
@@ -2600,6 +2600,7 @@ public:
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E);
Modified: cfe/trunk/test/CodeGen/builtins-nvptx.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-nvptx.c?rev=240669&r1=240668&r2=240669&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-nvptx.c (original)
+++ cfe/trunk/test/CodeGen/builtins-nvptx.c Thu Jun 25 13:29:42 2015
@@ -1,8 +1,13 @@
// REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
-int read_tid() {
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+__device__ int read_tid() {
// CHECK: call i32 @llvm.ptx.read.tid.x()
// CHECK: call i32 @llvm.ptx.read.tid.y()
@@ -18,7 +23,7 @@ int read_tid() {
}
-int read_ntid() {
+__device__ int read_ntid() {
// CHECK: call i32 @llvm.ptx.read.ntid.x()
// CHECK: call i32 @llvm.ptx.read.ntid.y()
@@ -34,7 +39,7 @@ int read_ntid() {
}
-int read_ctaid() {
+__device__ int read_ctaid() {
// CHECK: call i32 @llvm.ptx.read.ctaid.x()
// CHECK: call i32 @llvm.ptx.read.ctaid.y()
@@ -50,7 +55,7 @@ int read_ctaid() {
}
-int read_nctaid() {
+__device__ int read_nctaid() {
// CHECK: call i32 @llvm.ptx.read.nctaid.x()
// CHECK: call i32 @llvm.ptx.read.nctaid.y()
@@ -66,7 +71,7 @@ int read_nctaid() {
}
-int read_ids() {
+__device__ int read_ids() {
// CHECK: call i32 @llvm.ptx.read.laneid()
// CHECK: call i32 @llvm.ptx.read.warpid()
@@ -86,7 +91,7 @@ int read_ids() {
}
-int read_lanemasks() {
+__device__ int read_lanemasks() {
// CHECK: call i32 @llvm.ptx.read.lanemask.eq()
// CHECK: call i32 @llvm.ptx.read.lanemask.le()
@@ -104,8 +109,7 @@ int read_lanemasks() {
}
-
-long read_clocks() {
+__device__ long read_clocks() {
// CHECK: call i32 @llvm.ptx.read.clock()
// CHECK: call i64 @llvm.ptx.read.clock64()
@@ -117,7 +121,7 @@ long read_clocks() {
}
-int read_pms() {
+__device__ int read_pms() {
// CHECK: call i32 @llvm.ptx.read.pm0()
// CHECK: call i32 @llvm.ptx.read.pm1()
@@ -133,7 +137,7 @@ int read_pms() {
}
-void sync() {
+__device__ void sync() {
// CHECK: call void @llvm.ptx.bar.sync(i32 0)
@@ -146,7 +150,7 @@ void sync() {
// The idea is not to test all intrinsics, just that Clang is recognizing the
// builtins defined in BuiltinsNVPTX.def
-void nvvm_math(float f1, float f2, double d1, double d2) {
+__device__ void nvvm_math(float f1, float f2, double d1, double d2) {
// CHECK: call float @llvm.nvvm.fmax.f
float t1 = __nvvm_fmax_f(f1, f2);
// CHECK: call float @llvm.nvvm.fmin.f
@@ -176,3 +180,95 @@ void nvvm_math(float f1, float f2, doubl
// CHECK: call void @llvm.nvvm.barrier0()
__nvvm_bar0();
}
+
+__device__ int di;
+__shared__ int si;
+__device__ long dl;
+__shared__ long sl;
+__device__ long long dll;
+__shared__ long long sll;
+
+// Check for atomic intrinsics
+// CHECK-LABEL: nvvm_atom
+__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
+ long long *llp, long long ll) {
+ // CHECK: atomicrmw add
+ __nvvm_atom_add_gen_i(ip, i);
+ // CHECK: atomicrmw add
+ __nvvm_atom_add_gen_l(&dl, l);
+ // CHECK: atomicrmw add
+ __nvvm_atom_add_gen_ll(&sll, ll);
+
+ // CHECK: atomicrmw sub
+ __nvvm_atom_sub_gen_i(ip, i);
+ // CHECK: atomicrmw sub
+ __nvvm_atom_sub_gen_l(&dl, l);
+ // CHECK: atomicrmw sub
+ __nvvm_atom_sub_gen_ll(&sll, ll);
+
+ // CHECK: atomicrmw and
+ __nvvm_atom_and_gen_i(ip, i);
+ // CHECK: atomicrmw and
+ __nvvm_atom_and_gen_l(&dl, l);
+ // CHECK: atomicrmw and
+ __nvvm_atom_and_gen_ll(&sll, ll);
+
+ // CHECK: atomicrmw or
+ __nvvm_atom_or_gen_i(ip, i);
+ // CHECK: atomicrmw or
+ __nvvm_atom_or_gen_l(&dl, l);
+ // CHECK: atomicrmw or
+ __nvvm_atom_or_gen_ll(&sll, ll);
+
+ // CHECK: atomicrmw xor
+ __nvvm_atom_xor_gen_i(ip, i);
+ // CHECK: atomicrmw xor
+ __nvvm_atom_xor_gen_l(&dl, l);
+ // CHECK: atomicrmw xor
+ __nvvm_atom_xor_gen_ll(&sll, ll);
+
+ // CHECK: atomicrmw xchg
+ __nvvm_atom_xchg_gen_i(ip, i);
+ // CHECK: atomicrmw xchg
+ __nvvm_atom_xchg_gen_l(&dl, l);
+ // CHECK: atomicrmw xchg
+ __nvvm_atom_xchg_gen_ll(&sll, ll);
+
+ // CHECK: atomicrmw max
+ __nvvm_atom_max_gen_i(ip, i);
+ // CHECK: atomicrmw max
+ __nvvm_atom_max_gen_ui((unsigned int *)ip, i);
+ // CHECK: atomicrmw max
+ __nvvm_atom_max_gen_l(&dl, l);
+ // CHECK: atomicrmw max
+ __nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
+ // CHECK: atomicrmw max
+ __nvvm_atom_max_gen_ll(&sll, ll);
+ // CHECK: atomicrmw max
+ __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
+
+ // CHECK: atomicrmw min
+ __nvvm_atom_min_gen_i(ip, i);
+ // CHECK: atomicrmw min
+ __nvvm_atom_min_gen_ui((unsigned int *)ip, i);
+ // CHECK: atomicrmw min
+ __nvvm_atom_min_gen_l(&dl, l);
+ // CHECK: atomicrmw min
+ __nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
+ // CHECK: atomicrmw min
+ __nvvm_atom_min_gen_ll(&sll, ll);
+ // CHECK: atomicrmw min
+ __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
+
+ // CHECK: cmpxchg
+ __nvvm_atom_cas_gen_i(ip, 0, i);
+ // CHECK: cmpxchg
+ __nvvm_atom_cas_gen_l(&dl, 0, l);
+ // CHECK: cmpxchg
+ __nvvm_atom_cas_gen_ll(&sll, 0, ll);
+
+ // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
+ __nvvm_atom_add_gen_f(fp, f);
+
+ // CHECK: ret
+}
More information about the cfe-commits
mailing list