[PATCH] D110772: [clang] Allow pointers from any address space to gfx90a builtins
Anshil Gandhi via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 29 13:21:03 PDT 2021
gandhi21299 created this revision.
gandhi21299 added reviewers: yaxunl, rampitec.
gandhi21299 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
Explicitly address space cast the pointer if its address space differs
from the address space of the parameter.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D110772
Files:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGenHIP/unsafe-atomic-ops-gfx90a.hip
Index: clang/test/CodeGenHIP/unsafe-atomic-ops-gfx90a.hip
===================================================================
--- /dev/null
+++ clang/test/CodeGenHIP/unsafe-atomic-ops-gfx90a.hip
@@ -0,0 +1,32 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -munsafe-fp-atomics -target-cpu gfx90a -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+#define __global__ __attribute__((global))
+#define __device__ __attribute__((device))
+
+__device__ inline float unsafeAtomicAdd(float* addr, float value) {
+ if (__builtin_amdgcn_is_shared(
+ (const __attribute__((address_space(0))) void*)addr))
+ return __builtin_amdgcn_ds_atomic_fadd_f32(addr, value);
+ else
+ return __builtin_amdgcn_global_atomic_fadd_f32(addr, value);
+}
+
+// CHECK-LABEL: @_Z26test_global_atomic_add_f32Pf(float addrspace(1)* %val.coerce
+// CHECK: %[[VAL:.*]] = alloca float*, align 8, addrspace(5)
+// CHECK: %[[VAL_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[VAL]] to float**
+// CHECK: %[[VAL_ADDR:.*]] = alloca float*, align 8, addrspace(5)
+// CHECK: %[[VAL_ADDR_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[VAL_ADDR]] to float**
+// CHECK: %[[RTN:.*]] = alloca float*, align 8, addrspace(5)
+// CHECK: %[[RTN_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[RTN]] to float**
+// CHECK: %[[VAL_PTR:.*]] = addrspacecast float addrspace(1)* %val.coerce to float*
+// CHECK: store float* %[[VAL_PTR]], float** %[[VAL_ASCAST]], align 8
+// CHECK: %[[ARG:.*]] = load float*, float** %val.addr.ascast, align 8
+// CHECK: %[[CALL:.*]] = call contract float @_Z15unsafeAtomicAddPff(float* %[[ARG]], float 1.000000e+00) #4
+// CHECK: %[[RTN:.*]] = load float*, float** %[[RTN_ASCAST]], align 8
+// CHECK: store float %[[CALL]], float* %[[RTN]], align 4
+__global__ void test_global_atomic_add_f32(float *val){
+ float *rtn;
+ *rtn = unsafeAtomicAdd(val, 1.0);
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -15755,6 +15755,13 @@
llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
+ auto *AddrParamTy = F->getArg(0)->getType();
+ auto *AddrTy = Addr->getType();
+ assert(AddrTy->isPointerTy() && "Argument is not of pointer type");
+ if (AddrParamTy->getPointerAddressSpace() !=
+ AddrTy->getPointerAddressSpace() &&
+ AddrParamTy->getPointerElementType() == AddrTy->getPointerElementType())
+ Addr = Builder.CreateAddrSpaceCast(Addr, AddrParamTy);
return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
}
case AMDGPU::BI__builtin_amdgcn_read_exec: {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D110772.376019.patch
Type: text/x-patch
Size: 2969 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20210929/d3998f32/attachment-0001.bin>
More information about the cfe-commits
mailing list