[clang] 694fd10 - [HIP] Fix device malloc/free
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 11 11:49:51 PST 2022
Author: Yaxun (Sam) Liu
Date: 2022-01-11T14:49:34-05:00
New Revision: 694fd10659eb32399b0c4fb31f2d062040d800b4
URL: https://github.com/llvm/llvm-project/commit/694fd10659eb32399b0c4fb31f2d062040d800b4
DIFF: https://github.com/llvm/llvm-project/commit/694fd10659eb32399b0c4fb31f2d062040d800b4.diff
LOG: [HIP] Fix device malloc/free
ROCm 4.5 device library introduced __ockl_dm_alloc and __ockl_dm_dealloc
for supporting device side malloc/free.
This patch redefines device malloc/free to use these functions.
It also fixes a bug in the wrapper header which incorrectly defines free
with return type void* instead of void.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D116967
Added:
Modified:
clang/lib/Headers/__clang_hip_runtime_wrapper.h
clang/test/Headers/hip-header.hip
Removed:
################################################################################
diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
index 73021d256cbae..10cec58ed12f1 100644
--- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -50,6 +50,9 @@ extern "C" {
#include <cmath>
#include <cstdlib>
#include <stdlib.h>
+#if __has_include("hip/hip_version.h")
+#include "hip/hip_version.h"
+#endif // __has_include("hip/hip_version.h")
#else
typedef __SIZE_TYPE__ size_t;
// Define macros which are needed to declare HIP device API's without standard
@@ -74,25 +77,35 @@ typedef __SIZE_TYPE__ __hip_size_t;
extern "C" {
#endif //__cplusplus
+#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
+extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
+extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
+__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
+ return (void *) __ockl_dm_alloc(__size);
+}
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
+ __ockl_dm_dealloc((unsigned long long)__ptr);
+}
+#else // HIP version check
#if __HIP_ENABLE_DEVICE_MALLOC__
__device__ void *__hip_malloc(__hip_size_t __size);
__device__ void *__hip_free(void *__ptr);
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return __hip_malloc(__size);
}
-__attribute__((weak)) inline __device__ void *free(void *__ptr) {
- return __hip_free(__ptr);
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
+ __hip_free(__ptr);
}
#else
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__builtin_trap();
return (void *)0;
}
-__attribute__((weak)) inline __device__ void *free(void *__ptr) {
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
__builtin_trap();
- return (void *)0;
}
#endif
+#endif // HIP version check
#ifdef __cplusplus
} // extern "C"
diff --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip
index aa7abcedb7ae4..e8a86a2db4313 100644
--- a/clang/test/Headers/hip-header.hip
+++ b/clang/test/Headers/hip-header.hip
@@ -4,7 +4,7 @@
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
-// RUN: -D__HIPCC_RTC__ | FileCheck %s
+// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,NOMALLOC %s
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
@@ -25,6 +25,13 @@
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN: -internal-isystem %S/Inputs/include \
+// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
+// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
// expected-no-diagnostics
@@ -120,14 +127,18 @@ __device__ double test_isnan() {
#include <cstdlib>
// CHECK-LABEL: define{{.*}}@_Z11test_malloc
// CHECK: call {{.*}}i8* @malloc(i64
-// CHECK: define weak {{.*}}i8* @malloc(i64
+// CHECK-LABEL: define weak {{.*}}i8* @malloc(i64
+// MALLOC: call i64 @__ockl_dm_alloc
+// NOMALLOC: call void @llvm.trap
__device__ void test_malloc(void *a) {
a = malloc(42);
}
// CHECK-LABEL: define{{.*}}@_Z9test_free
-// CHECK: call {{.*}}i8* @free(i8*
-// CHECK: define weak {{.*}}i8* @free(i8*
+// CHECK: call {{.*}}void @free(i8*
+// CHECK-LABEL: define weak {{.*}}void @free(i8*
+// MALLOC: call void @__ockl_dm_dealloc
+// NOMALLOC: call void @llvm.trap
__device__ void test_free(void *a) {
free(a);
}
More information about the cfe-commits
mailing list