[clang] 07e8582 - [OpenMP][AMDGCN] Enable complex functions

Tue Aug 24 00:10:52 PDT 2021

Author: Pushpinder Singh
Date: 2021-08-24T12:40:41+05:30
New Revision: 07e85823aa75293888bba98868165f5acc3b2554

URL: https://github.com/llvm/llvm-project/commit/07e85823aa75293888bba98868165f5acc3b2554
DIFF: https://github.com/llvm/llvm-project/commit/07e85823aa75293888bba98868165f5acc3b2554.diff

LOG: [OpenMP][AMDGCN] Enable complex functions

This patch enables basic complex functionality using the ocml builtins.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D108552

Added: 
    clang/test/Headers/amdgcn-openmp-device-math-complex.c

Modified: 
    clang/lib/Headers/__clang_cuda_complex_builtins.h
    clang/lib/Headers/openmp_wrappers/complex
    clang/lib/Headers/openmp_wrappers/complex.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index 2b701fef0ea2a..7bc7bc2ce63e1 100644

--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -16,7 +16,7 @@
 // to work with CUDA and OpenMP target offloading [in C and C++ mode].)
 
 #pragma push_macro("__DEVICE__")
-#ifdef __OPENMP_NVPTX__
+#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
 #pragma omp declare target
 #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
 #else
@@ -26,7 +26,7 @@
 // To make the algorithms available for C and C++ in CUDA and OpenMP we select
 // 
diff erent but equivalent function versions. TODO: For OpenMP we currently
 // select the native builtins as the overload support for templates is lacking.
-#if !defined(__OPENMP_NVPTX__)
+#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
 #define _ISNANd std::isnan
 #define _ISNANf std::isnan
 #define _ISINFd std::isinf
@@ -276,7 +276,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
 #undef _fmaxd
 #undef _fmaxf
 
-#ifdef __OPENMP_NVPTX__
+#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
 #pragma omp end declare target
 #endif
 

diff  --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex
index dfd6193c97cbd..d6e740df42fb3 100644
--- a/clang/lib/Headers/openmp_wrappers/complex
+++ b/clang/lib/Headers/openmp_wrappers/complex
@@ -17,9 +17,18 @@
 // We require std::math functions in the complex builtins below.
 #include <cmath>
 
+#ifdef __NVPTX__
 #define __OPENMP_NVPTX__
 #include <__clang_cuda_complex_builtins.h>
 #undef __OPENMP_NVPTX__
+#endif // __NVPTX__
+
+#ifdef __AMDGCN__
+#define __OPENMP_AMDGCN__
+#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_AMDGCN__
+#endif // __AMDGCN__
+
 #endif
 
 // Grab the host header too.
@@ -43,4 +52,4 @@
 
 #pragma omp end declare variant
 
-#endif
+#endif // _LIBCPP_STD_VER

diff  --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h
index 15dc415b8126d..7e7c0866426bc 100644
--- a/clang/lib/Headers/openmp_wrappers/complex.h
+++ b/clang/lib/Headers/openmp_wrappers/complex.h
@@ -17,10 +17,19 @@
 // We require math functions in the complex builtins below.
 #include <math.h>
 
+#ifdef __NVPTX__
 #define __OPENMP_NVPTX__
 #include <__clang_cuda_complex_builtins.h>
 #undef __OPENMP_NVPTX__
 #endif
 
+#ifdef __AMDGCN__
+#define __OPENMP_AMDGCN__
+#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_AMDGCN__
+#endif
+
+#endif
+
 // Grab the host header too.
 #include_next <complex.h>

diff  --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
new file mode 100644
index 0000000000000..74d4b2485fabc
--- /dev/null
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK
+
+#include <complex.h>
+
+void test_complex_f64(double _Complex a) {
+// CHECK-LABEL: define {{.*}}test_complex_f64
+#pragma omp target
+  {
+    // CHECK: call { double, double } @__divdc3
+    // CHECK: call { double, double } @__muldc3
+    (void)(a * (a / a));
+  }
+}
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call double @__ocml_fabs_f64(
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isfinite_f64(
+// CHECK-DAG: call double @__ocml_copysign_f64(
+// CHECK-DAG: call double @__ocml_scalbn_f64(
+// CHECK-DAG: call double @__ocml_logb_f64(
+
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__ocml_isnan_f64(
+// CHECK-DAG: call i32 @__ocml_isinf_f64(
+// CHECK-DAG: call double @__ocml_copysign_f64(
+
+void test_complex_f32(float _Complex a) {
+// CHECK-LABEL: define {{.*}}test_complex_f32
+#pragma omp target
+  {
+    // CHECK: call [2 x i32] @__divsc3
+    // CHECK: call [2 x i32] @__mulsc3
+    (void)(a * (a / a));
+  }
+}
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call float @__ocml_fabs_f32(
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isfinite_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call float @__ocml_scalbn_f32(
+// CHECK-DAG: call float @__ocml_logb_f32(
+
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__ocml_isnan_f32(
+// CHECK-DAG: call i32 @__ocml_isinf_f32(
+// CHECK-DAG: call float @__ocml_copysign_f32(