[Openmp-commits] [openmp] 3ada8d2 - [libomptarget] Build a minimal deviceRTL for amdgcn

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Wed Dec 4 08:43:49 PST 2019


Author: JonChesterfield
Date: 2019-12-04T16:43:37Z
New Revision: 3ada8d2a87a2e818ea5302f40dbb0319d95b1554

URL: https://github.com/llvm/llvm-project/commit/3ada8d2a87a2e818ea5302f40dbb0319d95b1554
DIFF: https://github.com/llvm/llvm-project/commit/3ada8d2a87a2e818ea5302f40dbb0319d95b1554.diff

LOG: [libomptarget] Build a minimal deviceRTL for amdgcn

Summary:
[libomptarget] Build a minimal deviceRTL for amdgcn

Repeat of D70414, with an include path fixed. Diff for sanity checking.

The CMakeLists.txt file is functionally identical to the one used in the aomp fork.
Whitespace changes were made based on nvptx/CMakeLists.txt, plus the
copyright notice updated to match (Greg was the original author so would
like his sign off on that here).

This change will build a small subset of the deviceRTL if an appropriate toolchain is
available, e.g. a local install of rocm. Support.h is moved from nvptx as a dependency
of debug.h.

Reviewers: ABataev, jdoerfert

Reviewed By: ABataev

Subscribers: jvesely, mgorny, jfb, openmp-commits, jdoerfert

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D70971

Added: 
    openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/common/device_environment.h
    openmp/libomptarget/deviceRTLs/common/support.h

Modified: 
    openmp/libomptarget/deviceRTLs/CMakeLists.txt
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
    openmp/libomptarget/deviceRTLs/common/debug.h
    openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
    openmp/libomptarget/deviceRTLs/nvptx/src/support.cu

Removed: 
    openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h
    openmp/libomptarget/deviceRTLs/nvptx/src/support.h


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/CMakeLists.txt
index 9723fb8cde34..8bbf987aaf20 100644
--- a/openmp/libomptarget/deviceRTLs/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/CMakeLists.txt
@@ -6,8 +6,9 @@
 #
 # ##===----------------------------------------------------------------------===##
 #
-# Build a device RTL for each available machine available.
+# Build a device RTL for each available machine.
 #
 ##===----------------------------------------------------------------------===##
 
+add_subdirectory(amdgcn)
 add_subdirectory(nvptx)

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
new file mode 100644
index 000000000000..6b82b4eccdc8
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -0,0 +1,136 @@
+##===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Build the AMDGCN Device RTL if the ROCM tools are available
+#
+##===----------------------------------------------------------------------===##
+
+find_package(LLVM QUIET CONFIG
+  PATHS
+  $ENV{AOMP}
+  $ENV{HOME}/rocm/aomp
+  /opt/rocm/aomp
+  /usr/lib/rocm/aomp
+  ${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR}
+  ${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR}
+  ${CMAKE_CXX_COMPILER_DIR}
+  NO_DEFAULT_PATH)
+
+if (LLVM_DIR)
+  libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake")
+else()
+  libomptarget_say("Not building AMDGCN device RTL: AOMP not found")
+  return()
+endif()
+
+set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX})
+
+if (AOMP_INSTALL_PREFIX)
+  set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin)
+else()
+  set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin)
+endif()
+
+libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}")
+
+project(omptarget-amdgcn)
+
+add_custom_target(omptarget-amdgcn ALL)
+
+#optimization level
+set(optimization_level 2)
+
+# Activate RTL message dumps if requested by the user.
+if(LIBOMPTARGET_NVPTX_DEBUG)
+  set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
+endif()
+
+get_filename_component(devicertl_base_directory
+  ${CMAKE_CURRENT_SOURCE_DIR}
+  DIRECTORY)
+
+set(cuda_sources
+  ${devicertl_base_directory}/common/src/cancel.cu
+  ${devicertl_base_directory}/common/src/critical.cu)
+
+set(h_files
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
+  ${devicertl_base_directory}/common/debug.h
+  ${devicertl_base_directory}/common/device_environment.h
+  ${devicertl_base_directory}/common/state-queue.h
+  ${devicertl_base_directory}/common/state-queuei.h
+  ${devicertl_base_directory}/common/support.h)
+
+# for both in-tree and out-of-tree build
+if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
+  set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR})
+else()
+  set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
+endif()
+
+# create libraries
+set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900)
+if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
+  set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
+endif()
+
+macro(add_cuda_bc_library)
+  set(cu_cmd ${AOMP_BINDIR}/clang++
+    -std=c++11
+    -fcuda-rdc
+    -fvisibility=default
+    --cuda-device-only
+    -Wno-unused-value
+    -x hip
+    -O${optimization_level}
+    --cuda-gpu-arch=${mcpu}
+    ${CUDA_DEBUG}
+    -I${CMAKE_CURRENT_SOURCE_DIR}/src
+    -I${devicertl_base_directory})
+
+  set(bc1_files)
+
+  foreach(file ${ARGN})
+    get_filename_component(fname ${file} NAME_WE)
+    set(bc1_filename ${fname}.${mcpu}.bc)
+
+    add_custom_command(
+      OUTPUT ${bc1_filename}
+      COMMAND ${cu_cmd} ${file} -o ${bc1_filename}
+      DEPENDS ${file} ${h_files})
+
+    list(APPEND bc1_files ${bc1_filename})
+  endforeach()
+
+  add_custom_command(
+    OUTPUT linkout.cuda.${mcpu}.bc
+    COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc
+    DEPENDS ${bc1_files})
+
+  list(APPEND bc_files linkout.cuda.${mcpu}.bc)
+endmacro()
+
+set(libname "omptarget-amdgcn")
+
+foreach(mcpu ${mcpus})
+  set(bc_files)
+  add_cuda_bc_library(${cuda_sources})
+
+  set(bc_libname lib${libname}-${mcpu}.bc)
+  add_custom_command(
+    OUTPUT ${bc_libname}
+    COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname}
+    DEPENDS ${bc_files})
+
+  add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname})
+
+  install(FILES ${OUTPUTDIR}/${bc_libname}
+     DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice"
+  )
+endforeach()

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b2d06ab4b73a..c6e082c2b961 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -72,8 +72,6 @@ EXTERN uint64_t __lanemask_lt();
 // thread's lane number in the warp
 EXTERN uint64_t __lanemask_gt();
 
-EXTERN void llvm_amdgcn_s_barrier();
-
 // CU id
 EXTERN unsigned __smid();
 
@@ -101,25 +99,21 @@ INLINE uint32_t __kmpc_impl_smid() {
   return __smid();
 }
 
-INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); }
+INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
 
-INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); }
+INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
 
 INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
   return __ballot64(1);
 }
 
-INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
-                                     int32_t SrcLane) {
-  return __shfl(Var, SrcLane, WARPSIZE);
-}
+EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
+                                     int32_t SrcLane);
 
-INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
-                                          uint32_t Delta, int32_t Width) {
-  return __shfl_down(Var, Delta, Width);
-}
+EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
+                                          uint32_t Delta, int32_t Width);
 
-INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); }
+INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
 
 INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
   // we have protected the master warp from releasing from its barrier
@@ -128,4 +122,15 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
   __builtin_amdgcn_s_barrier();
 }
 
+// DEVICE versions of part of libc
+extern "C" {
+DEVICE __attribute__((noreturn)) void
+__assertfail(const char *, const char *, unsigned, const char *, size_t);
+INLINE static void __assert_fail(const char *__message, const char *__file,
+                                 unsigned int __line, const char *__function) {
+  __assertfail(__message, __file, __line, __function, sizeof(char));
+}
+DEVICE int printf(const char *, ...);
+}
+
 #endif

diff  --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h
index 3388b04616f4..8bb4e3a6dd0c 100644
--- a/openmp/libomptarget/deviceRTLs/common/debug.h
+++ b/openmp/libomptarget/deviceRTLs/common/debug.h
@@ -28,7 +28,7 @@
 #ifndef _OMPTARGET_NVPTX_DEBUG_H_
 #define _OMPTARGET_NVPTX_DEBUG_H_
 
-#include "device_environment.h"
+#include "common/device_environment.h"
 
 ////////////////////////////////////////////////////////////////////////////////
 // set desired level of debugging
@@ -128,7 +128,7 @@
 
 #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
 #include <stdio.h>
-#include "support.h"
+#include "common/support.h"
 
 template <typename... Arguments>
 NOINLINE static void log(const char *fmt, Arguments... parameters) {

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h b/openmp/libomptarget/deviceRTLs/common/device_environment.h
similarity index 89%
rename from openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h
rename to openmp/libomptarget/deviceRTLs/common/device_environment.h
index b2f65af354a6..68a7757d2047 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h
+++ b/openmp/libomptarget/deviceRTLs/common/device_environment.h
@@ -19,6 +19,6 @@ struct omptarget_device_environmentTy {
   int32_t debug_level;
 };
 
-extern __device__ omptarget_device_environmentTy omptarget_device_environment;
+extern DEVICE omptarget_device_environmentTy omptarget_device_environment;
 
 #endif

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/common/support.h
similarity index 100%
rename from openmp/libomptarget/deviceRTLs/nvptx/src/support.h
rename to openmp/libomptarget/deviceRTLs/common/support.h

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
index 181bceb3e175..46ed8f4ef343 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "omptarget-nvptx.h"
-#include "device_environment.h"
+#include "common/device_environment.h"
 
 ////////////////////////////////////////////////////////////////////////////////
 // global device environment

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index a446e005c32f..51e88adee6d0 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -24,7 +24,7 @@
 #include "common/debug.h"     // debug
 #include "interface.h" // interfaces with omp, compiler, and user
 #include "common/state-queue.h"
-#include "support.h"
+#include "common/support.h"
 
 #define OMPTARGET_NVPTX_VERSION 1.1
 

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
index c8ac493459c1..b5efa632b004 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "support.h"
+#include "common/support.h"
 #include "common/debug.h"
 #include "omptarget-nvptx.h"
 


        


More information about the Openmp-commits mailing list