[clang] 8193b29 - Revert "[HIP] Allow std::malloc in device function"

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 14 13:33:58 PDT 2023


Author: Yaxun (Sam) Liu
Date: 2023-06-14T16:33:30-04:00
New Revision: 8193b291cefa732dd37fc917bd2921385e639d21

URL: https://github.com/llvm/llvm-project/commit/8193b291cefa732dd37fc917bd2921385e639d21
DIFF: https://github.com/llvm/llvm-project/commit/8193b291cefa732dd37fc917bd2921385e639d21.diff

LOG: Revert "[HIP] Allow std::malloc in device function"

This reverts commit f5033c37025db46df95a7859d7189d09b5e3433e.

revert this patch since it causes regressions for Tensile. A
reduced test case is:

int main()
{
    std::shared_ptr<float> a;
    a = std::shared_ptr<float>(
        (float*)std::malloc(sizeof(float) * 100),
        std::free
    );
    return 0;
}

Will fix the issue then re-commit.

Fixes: SWDEV-405317

Added: 
    

Modified: 
    clang/lib/Headers/__clang_hip_runtime_wrapper.h
    clang/test/Headers/Inputs/include/cstdlib
    clang/test/Headers/Inputs/include/math.h
    clang/test/Headers/hip-header.hip

Removed: 
    clang/test/Headers/Inputs/include/sstream
    clang/test/Headers/Inputs/include/stdexcept


################################################################################
diff  --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
index ed1550038e63e..e8817073efdbc 100644
--- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -47,9 +47,28 @@ extern "C" {
 #endif //__cplusplus
 
 #if !defined(__HIPCC_RTC__)
+#include <cmath>
+#include <cstdlib>
+#include <stdlib.h>
 #if __has_include("hip/hip_version.h")
 #include "hip/hip_version.h"
 #endif // __has_include("hip/hip_version.h")
+#else
+typedef __SIZE_TYPE__ size_t;
+// Define macros which are needed to declare HIP device API's without standard
+// C/C++ headers. This is for readability so that these API's can be written
+// the same way as non-hipRTC use case. These macros need to be popped so that
+// they do not pollute users' name space.
+#pragma push_macro("NULL")
+#pragma push_macro("uint32_t")
+#pragma push_macro("uint64_t")
+#pragma push_macro("CHAR_BIT")
+#pragma push_macro("INT_MAX")
+#define NULL (void *)0
+#define uint32_t __UINT32_TYPE__
+#define uint64_t __UINT64_TYPE__
+#define CHAR_BIT __CHAR_BIT__
+#define INT_MAX __INTMAX_MAX__
 #endif // __HIPCC_RTC__
 
 typedef __SIZE_TYPE__ __hip_size_t;
@@ -59,13 +78,11 @@ extern "C" {
 #endif //__cplusplus
 
 #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
-__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
-__device__ void __ockl_dm_dealloc(unsigned long long __addr);
+extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
+extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
 #if __has_feature(address_sanitizer)
-__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
-                                                 unsigned long long __pc);
-__device__ void __asan_free_impl(unsigned long long __addr,
-                                 unsigned long long __pc);
+extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
+extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
 __attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
   unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
   return (void *)__asan_malloc_impl(__size, __pc);
@@ -74,7 +91,7 @@ __attribute__((noinline, weak)) __device__ void free(void *__ptr) {
   unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
   __asan_free_impl((unsigned long long)__ptr, __pc);
 }
-#else // __has_feature(address_sanitizer)
+#else
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
   return (void *) __ockl_dm_alloc(__size);
 }
@@ -92,7 +109,7 @@ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
 __attribute__((weak)) inline __device__ void free(void *__ptr) {
   __hip_free(__ptr);
 }
-#else  // __HIP_ENABLE_DEVICE_MALLOC__
+#else
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
   __builtin_trap();
   return (void *)0;
@@ -100,38 +117,13 @@ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
 __attribute__((weak)) inline __device__ void free(void *__ptr) {
   __builtin_trap();
 }
-#endif // __HIP_ENABLE_DEVICE_MALLOC__
+#endif
 #endif // HIP version check
 
 #ifdef __cplusplus
 } // extern "C"
 #endif //__cplusplus
 
-#if !defined(__HIPCC_RTC__)
-#include <cmath>
-#include <cstdlib>
-#include <stdlib.h>
-#if __has_include("hip/hip_version.h")
-#include "hip/hip_version.h"
-#endif // __has_include("hip/hip_version.h")
-#else
-typedef __SIZE_TYPE__ size_t;
-// Define macros which are needed to declare HIP device API's without standard
-// C/C++ headers. This is for readability so that these API's can be written
-// the same way as non-hipRTC use case. These macros need to be popped so that
-// they do not pollute users' name space.
-#pragma push_macro("NULL")
-#pragma push_macro("uint32_t")
-#pragma push_macro("uint64_t")
-#pragma push_macro("CHAR_BIT")
-#pragma push_macro("INT_MAX")
-#define NULL (void *)0
-#define uint32_t __UINT32_TYPE__
-#define uint64_t __UINT64_TYPE__
-#define CHAR_BIT __CHAR_BIT__
-#define INT_MAX __INTMAX_MAX__
-#endif // __HIPCC_RTC__
-
 #include <__clang_hip_libdevice_declares.h>
 #include <__clang_hip_math.h>
 #include <__clang_hip_stdlib.h>

diff  --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib
index aac4e68662da6..0b0adf4387309 100644
--- a/clang/test/Headers/Inputs/include/cstdlib
+++ b/clang/test/Headers/Inputs/include/cstdlib
@@ -26,7 +26,5 @@ float fabs(float __x) { return __builtin_fabs(__x); }
 float abs(float __x) { return fabs(__x); }
 double abs(double __x) { return fabs(__x); }
 
-using ::malloc;
-using ::free;
 }
 

diff  --git a/clang/test/Headers/Inputs/include/math.h b/clang/test/Headers/Inputs/include/math.h
index cbd6bf7457a76..b13b14f2b1244 100644
--- a/clang/test/Headers/Inputs/include/math.h
+++ b/clang/test/Headers/Inputs/include/math.h
@@ -105,6 +105,8 @@ long lrint(double __a);
 long lrintf(float __a);
 long lround(double __a);
 long lroundf(float __a);
+int max(int __a, int __b);
+int min(int __a, int __b);
 double modf(double __a, double *__b);
 float modff(float __a, float *__b);
 double nearbyint(double __a);

diff  --git a/clang/test/Headers/Inputs/include/sstream b/clang/test/Headers/Inputs/include/sstream
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Headers/Inputs/include/stdexcept b/clang/test/Headers/Inputs/include/stdexcept
deleted file mode 100644
index e69de29bb2d1d..0000000000000

diff  --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip
index 8264b4e2c8e5d..3ee03af5f9f8f 100644
--- a/clang/test/Headers/hip-header.hip
+++ b/clang/test/Headers/hip-header.hip
@@ -31,14 +31,7 @@
 // RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
 // RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
 // RUN:   -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
-// RUN:   -D__HIPCC_RTC__ -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
-// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
-// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
-// RUN:   -internal-isystem %S/Inputs/include \
-// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
-// RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
-// RUN:   -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
-// RUN:   -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
+// RUN:   -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
 // RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
 // RUN:   -internal-isystem %S/Inputs/include \
@@ -47,13 +40,6 @@
 // RUN:   -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
 // RUN:   -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \
 // RUN:   | FileCheck -check-prefixes=MALLOC-ASAN %s
-// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
-// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
-// RUN:   -internal-isystem %S/Inputs/include \
-// RUN:   -aux-triple amdgcn-amd-amdhsa -triple x86_64-unknown-unknown \
-// RUN:   -emit-llvm %s -o - \
-// RUN:   -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
-// RUN:   -disable-llvm-passes | FileCheck -check-prefixes=MALLOC-HOST %s
 
 // expected-no-diagnostics
 
@@ -147,10 +133,9 @@ __device__ double test_isnan() {
 
 // Check that device malloc and free do not conflict with std headers.
 #include <cstdlib>
-// MALLOC-LABEL: define{{.*}}@_Z11test_malloc
-// MALLOC: call {{.*}}ptr @malloc(i64
-// MALLOC: call {{.*}}ptr @malloc(i64
-// MALLOC-LABEL: define weak {{.*}}ptr @malloc(i64
+// CHECK-LABEL: define{{.*}}@_Z11test_malloc
+// CHECK: call {{.*}}ptr @malloc(i64
+// CHECK-LABEL: define weak {{.*}}ptr @malloc(i64
 // MALLOC:  call i64 @__ockl_dm_alloc
 // NOMALLOC:  call void @llvm.trap
 // MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64
@@ -158,13 +143,11 @@ __device__ double test_isnan() {
 // MALLOC-ASAN:  call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}})
 __device__ void test_malloc(void *a) {
   a = malloc(42);
-  a = std::malloc(42);
 }
 
-// MALLOC-LABEL: define{{.*}}@_Z9test_free
-// MALLOC: call {{.*}}void @free(ptr
-// MALLOC: call {{.*}}void @free(ptr
-// MALLOC-LABEL: define weak {{.*}}void @free(ptr
+// CHECK-LABEL: define{{.*}}@_Z9test_free
+// CHECK: call {{.*}}void @free(ptr
+// CHECK-LABEL: define weak {{.*}}void @free(ptr
 // MALLOC:  call void @__ockl_dm_dealloc
 // NOMALLOC: call void @llvm.trap
 // MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr
@@ -172,17 +155,4 @@ __device__ void test_malloc(void *a) {
 // MALLOC-ASAN:  call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}})
 __device__ void test_free(void *a) {
   free(a);
-  std::free(a);
-}
-
-// MALLOC-HOST-LABEL: define{{.*}}@_Z16test_malloc_host
-// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
-// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
-// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
-// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
-void test_malloc_host(void *a) {
-  a = malloc(42);
-  free(a);
-  a = std::malloc(42);
-  std::free(a);
 }


        


More information about the cfe-commits mailing list