[clang] dbd7bad - [openmp] Annotate tmp variables with omp_thread_mem_alloc

Jon Chesterfield via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 18 18:22:25 PDT 2021


Author: Jon Chesterfield
Date: 2021-08-19T02:22:11+01:00
New Revision: dbd7bad9ad9bc32538e324417c23387bf4ac7747

URL: https://github.com/llvm/llvm-project/commit/dbd7bad9ad9bc32538e324417c23387bf4ac7747
DIFF: https://github.com/llvm/llvm-project/commit/dbd7bad9ad9bc32538e324417c23387bf4ac7747.diff

LOG: [openmp] Annotate tmp variables with omp_thread_mem_alloc

Fixes miscompile of calls into ocml. Bug 51445.

The stack variable `double __tmp` is moved to dynamically allocated shared
memory by CGOpenMPRuntimeGPU. This is usually fine, but when the variable
is passed to a function that is explicitly annotated address_space(5) then
allocating the variable off-stack leads to a miscompile in the back end,
which cannot decide to move the variable back to the stack from shared.

This could be fixed by removing the AS(5) annotation from the math library
or by explicitly marking the variables as thread_mem_alloc. The cast to
AS(5) is still a no-op once IR is reached.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D107971

Added: 
    clang/test/Headers/Inputs/include/omp.h

Modified: 
    clang/lib/Headers/__clang_hip_math.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 9effaa18d3e8c..ef7e087b832ca 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -19,6 +19,9 @@
 #endif
 #include <limits.h>
 #include <stdint.h>
+#ifdef __OPENMP_AMDGCN__
+#include <omp.h>
+#endif
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
@@ -258,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
 __DEVICE__
 float frexpf(float __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
       __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -343,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); }
 __DEVICE__
 float modff(float __x, float *__iptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
       __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__iptr = __tmp;
@@ -423,6 +432,9 @@ float remainderf(float __x, float __y) {
 __DEVICE__
 float remquof(float __x, float __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r = __ocml_remquo_f32(
       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -479,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
 __DEVICE__
 void sincosf(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr =
       __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -487,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
 __DEVICE__
 void sincospif(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f32(
       __x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -799,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
 __DEVICE__
 double frexp(double __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
       __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -883,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); }
 __DEVICE__
 double modf(double __x, double *__iptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
       __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
   *__iptr = __tmp;
@@ -971,6 +995,9 @@ double remainder(double __x, double __y) {
 __DEVICE__
 double remquo(double __x, double __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r = __ocml_remquo_f64(
       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -1029,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); }
 __DEVICE__
 void sincos(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincos_f64(
       __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
@@ -1037,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
 __DEVICE__
 void sincospi(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f64(
       __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;

diff  --git a/clang/test/Headers/Inputs/include/omp.h b/clang/test/Headers/Inputs/include/omp.h
new file mode 100644
index 0000000000000..9a6c115394793
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/omp.h
@@ -0,0 +1,21 @@
+#ifndef __OMP_H
+#define __OMP_H
+
+#if _OPENMP
+// Follows the pattern in interface.h
+// Clang sema checks this type carefully, needs to closely match that from omp.h
+typedef enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = ~(0U)
+} omp_allocator_handle_t;
+#endif
+
+#endif


        


More information about the cfe-commits mailing list