[llvm] [Offload][NFC] Reorganize `utils::` and make Device/Host/Shared clearer (PR #100280)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 16:39:43 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Johannes Doerfert (jdoerfert)
<details>
<summary>Changes</summary>
We had three `utils::` namespaces, all with different "meaning" (host, device, hsa_utils). We should, when we can, keep "include/Shared" accessible from host and device, thus RefCountTy has been moved to a separate header. `hsa_utils` was introduced to make `utils::` less overloaded. And common functionality was de-duplicated, e.g., `utils::advance` and `utils::advanceVoidPtr` -> `utils:advancePtr`. Type punning now checks for the size of the result to make sure it matches the source type.
No functional change was intended.
---
Patch is 51.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100280.diff
36 Files Affected:
- (modified) offload/DeviceRTL/CMakeLists.txt (+3-3)
- (modified) offload/DeviceRTL/include/Allocator.h (+1-1)
- (modified) offload/DeviceRTL/include/Configuration.h (+1-1)
- (renamed) offload/DeviceRTL/include/DeviceTypes.h (+1-1)
- (added) offload/DeviceRTL/include/DeviceUtils.h (+54)
- (modified) offload/DeviceRTL/include/Interface.h (+1-1)
- (modified) offload/DeviceRTL/include/LibC.h (+1-1)
- (modified) offload/DeviceRTL/include/Mapping.h (+1-1)
- (modified) offload/DeviceRTL/include/State.h (+2-2)
- (modified) offload/DeviceRTL/include/Synchronization.h (+1-1)
- (removed) offload/DeviceRTL/include/Utils.h (-100)
- (modified) offload/DeviceRTL/src/Allocator.cpp (+2-2)
- (modified) offload/DeviceRTL/src/Configuration.cpp (+1-1)
- (modified) offload/DeviceRTL/src/Debug.cpp (+1-1)
- (renamed) offload/DeviceRTL/src/DeviceUtils.cpp (+8-8)
- (modified) offload/DeviceRTL/src/Kernel.cpp (+1-1)
- (modified) offload/DeviceRTL/src/Mapping.cpp (+2-2)
- (modified) offload/DeviceRTL/src/Misc.cpp (+1-1)
- (modified) offload/DeviceRTL/src/Parallelism.cpp (+2-2)
- (modified) offload/DeviceRTL/src/Reduction.cpp (+2-2)
- (modified) offload/DeviceRTL/src/State.cpp (+8-8)
- (modified) offload/DeviceRTL/src/Synchronization.cpp (+2-2)
- (modified) offload/DeviceRTL/src/Tasking.cpp (+3-3)
- (modified) offload/DeviceRTL/src/Workshare.cpp (+3-3)
- (added) offload/include/Shared/RefCnt.h (+56)
- (added) offload/include/Shared/Types.h (+22)
- (modified) offload/include/Shared/Utils.h (+45-55)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+52-48)
- (modified) offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h (+2-2)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+8-8)
- (modified) offload/plugins-nextgen/common/src/GlobalHandler.cpp (+2-2)
- (modified) offload/plugins-nextgen/common/src/JIT.cpp (+2-2)
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+14-12)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+1-1)
- (modified) offload/src/DeviceImage.cpp (+2-3)
- (modified) offload/src/omptarget.cpp (+2-2)
``````````diff
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 7818c8d752599..5bd54c32a37bd 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -79,8 +79,8 @@ set(include_files
${include_directory}/Mapping.h
${include_directory}/State.h
${include_directory}/Synchronization.h
- ${include_directory}/Types.h
- ${include_directory}/Utils.h
+ ${include_directory}/DeviceTypes.h
+ ${include_directory}/DeviceUtils.h
${include_directory}/Workshare.h
)
@@ -97,7 +97,7 @@ set(src_files
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
${source_directory}/Tasking.cpp
- ${source_directory}/Utils.cpp
+ ${source_directory}/DeviceUtils.cpp
${source_directory}/Workshare.cpp
)
diff --git a/offload/DeviceRTL/include/Allocator.h b/offload/DeviceRTL/include/Allocator.h
index a28eb0fb2977e..6bb1cafac720f 100644
--- a/offload/DeviceRTL/include/Allocator.h
+++ b/offload/DeviceRTL/include/Allocator.h
@@ -12,7 +12,7 @@
#ifndef OMPTARGET_ALLOCATOR_H
#define OMPTARGET_ALLOCATOR_H
-#include "Types.h"
+#include "DeviceTypes.h"
// Forward declaration.
struct KernelEnvironmentTy;
diff --git a/offload/DeviceRTL/include/Configuration.h b/offload/DeviceRTL/include/Configuration.h
index 8e6f5c89cbf24..f8b7a6c3c6c9d 100644
--- a/offload/DeviceRTL/include/Configuration.h
+++ b/offload/DeviceRTL/include/Configuration.h
@@ -15,7 +15,7 @@
#include "Shared/Environment.h"
-#include "Types.h"
+#include "DeviceTypes.h"
namespace ompx {
namespace config {
diff --git a/offload/DeviceRTL/include/Types.h b/offload/DeviceRTL/include/DeviceTypes.h
similarity index 98%
rename from offload/DeviceRTL/include/Types.h
rename to offload/DeviceRTL/include/DeviceTypes.h
index 2e12d9da0353b..2a594de0befc2 100644
--- a/offload/DeviceRTL/include/Types.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -1,4 +1,4 @@
-//===---------- Types.h - OpenMP types ---------------------------- C++ -*-===//
+//===---------- DeviceTypes.h - OpenMP types ---------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h
new file mode 100644
index 0000000000000..378d1fb2d65e3
--- /dev/null
+++ b/offload/DeviceRTL/include/DeviceUtils.h
@@ -0,0 +1,54 @@
+//===--- DeviceUtils.h - OpenMP device runtime utility functions -- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OMPTARGET_DEVICERTL_DEVICE_UTILS_H
+#define OMPTARGET_DEVICERTL_DEVICE_UTILS_H
+
+#include "Shared/Utils.h"
+#include "DeviceTypes.h"
+
+#pragma omp begin declare target device_type(nohost)
+
+namespace utils {
+
+/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
+/// is identified by \p Mask.
+int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
+
+int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
+
+int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
+
+uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+
+/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
+uint64_t pack(uint32_t LowBits, uint32_t HighBits);
+
+/// Unpack \p Val into \p LowBits and \p HighBits.
+void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
+
+/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
+bool isSharedMemPtr(void *Ptr);
+
+/// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)).
+bool isThreadLocalMemPtr(void *Ptr);
+
+/// A pointer variable that has by design an `undef` value. Use with care.
+[[clang::loader_uninitialized]] static void *const UndefPtr;
+
+#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
+#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
+
+} // namespace utils
+
+#pragma omp end declare target
+
+#endif
diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h
index d36d4227091ef..c4bfaaa2404b4 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -14,7 +14,7 @@
#include "Shared/Environment.h"
-#include "Types.h"
+#include "DeviceTypes.h"
/// External API
///
diff --git a/offload/DeviceRTL/include/LibC.h b/offload/DeviceRTL/include/LibC.h
index dde86af783af9..6e02b4aca462a 100644
--- a/offload/DeviceRTL/include/LibC.h
+++ b/offload/DeviceRTL/include/LibC.h
@@ -12,7 +12,7 @@
#ifndef OMPTARGET_LIBC_H
#define OMPTARGET_LIBC_H
-#include "Types.h"
+#include "DeviceTypes.h"
extern "C" {
diff --git a/offload/DeviceRTL/include/Mapping.h b/offload/DeviceRTL/include/Mapping.h
index 165904644dbb9..2fb87abe5418c 100644
--- a/offload/DeviceRTL/include/Mapping.h
+++ b/offload/DeviceRTL/include/Mapping.h
@@ -12,7 +12,7 @@
#ifndef OMPTARGET_MAPPING_H
#define OMPTARGET_MAPPING_H
-#include "Types.h"
+#include "DeviceTypes.h"
namespace ompx {
diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h
index 1a3490394458f..37699529e726f 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -16,8 +16,8 @@
#include "Debug.h"
#include "Mapping.h"
-#include "Types.h"
-#include "Utils.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
// Forward declaration.
struct KernelEnvironmentTy;
diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
index af9e1a673e6a2..874974cc861df 100644
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -12,7 +12,7 @@
#ifndef OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
#define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
-#include "Types.h"
+#include "DeviceTypes.h"
namespace ompx {
diff --git a/offload/DeviceRTL/include/Utils.h b/offload/DeviceRTL/include/Utils.h
deleted file mode 100644
index 82e2397b5958b..0000000000000
--- a/offload/DeviceRTL/include/Utils.h
+++ /dev/null
@@ -1,100 +0,0 @@
-//===--------- Utils.h - OpenMP device runtime utility functions -- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_DEVICERTL_UTILS_H
-#define OMPTARGET_DEVICERTL_UTILS_H
-
-#include "Types.h"
-
-#pragma omp begin declare target device_type(nohost)
-
-namespace ompx {
-namespace utils {
-
-/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
-/// is identified by \p Mask.
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
-
-int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
-
-int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
-
-uint64_t ballotSync(uint64_t Mask, int32_t Pred);
-
-/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
-uint64_t pack(uint32_t LowBits, uint32_t HighBits);
-
-/// Unpack \p Val into \p LowBits and \p HighBits.
-void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
-
-/// Round up \p V to a \p Boundary.
-template <typename Ty> inline Ty roundUp(Ty V, Ty Boundary) {
- return (V + Boundary - 1) / Boundary * Boundary;
-}
-
-/// Advance \p Ptr by \p Bytes bytes.
-template <typename Ty1, typename Ty2> inline Ty1 *advance(Ty1 Ptr, Ty2 Bytes) {
- return reinterpret_cast<Ty1 *>(reinterpret_cast<char *>(Ptr) + Bytes);
-}
-
-/// Return the first bit set in \p V.
-inline uint32_t ffs(uint32_t V) {
- static_assert(sizeof(int) == sizeof(uint32_t), "type size mismatch");
- return __builtin_ffs(V);
-}
-
-/// Return the first bit set in \p V.
-inline uint32_t ffs(uint64_t V) {
- static_assert(sizeof(long) == sizeof(uint64_t), "type size mismatch");
- return __builtin_ffsl(V);
-}
-
-/// Return the number of bits set in \p V.
-inline uint32_t popc(uint32_t V) {
- static_assert(sizeof(int) == sizeof(uint32_t), "type size mismatch");
- return __builtin_popcount(V);
-}
-
-/// Return the number of bits set in \p V.
-inline uint32_t popc(uint64_t V) {
- static_assert(sizeof(long) == sizeof(uint64_t), "type size mismatch");
- return __builtin_popcountl(V);
-}
-
-/// Return \p V aligned "upwards" according to \p Align.
-template <typename Ty1, typename Ty2> inline Ty1 align_up(Ty1 V, Ty2 Align) {
- return ((V + Ty1(Align) - 1) / Ty1(Align)) * Ty1(Align);
-}
-/// Return \p V aligned "downwards" according to \p Align.
-template <typename Ty1, typename Ty2> inline Ty1 align_down(Ty1 V, Ty2 Align) {
- return V - V % Align;
-}
-
-/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
-bool isSharedMemPtr(void *Ptr);
-
-/// Return \p V typed punned as \p DstTy.
-template <typename DstTy, typename SrcTy> inline DstTy convertViaPun(SrcTy V) {
- return *((DstTy *)(&V));
-}
-
-/// A pointer variable that has by design an `undef` value. Use with care.
-[[clang::loader_uninitialized]] static void *const UndefPtr;
-
-#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
-#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
-
-} // namespace utils
-} // namespace ompx
-
-#pragma omp end declare target
-
-#endif
diff --git a/offload/DeviceRTL/src/Allocator.cpp b/offload/DeviceRTL/src/Allocator.cpp
index c9c940de62c1a..2a85a34d32f6e 100644
--- a/offload/DeviceRTL/src/Allocator.cpp
+++ b/offload/DeviceRTL/src/Allocator.cpp
@@ -14,8 +14,8 @@
#include "Configuration.h"
#include "Mapping.h"
#include "Synchronization.h"
-#include "Types.h"
-#include "Utils.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
using namespace ompx;
diff --git a/offload/DeviceRTL/src/Configuration.cpp b/offload/DeviceRTL/src/Configuration.cpp
index ef0c3663536f5..4d97ad67313aa 100644
--- a/offload/DeviceRTL/src/Configuration.cpp
+++ b/offload/DeviceRTL/src/Configuration.cpp
@@ -13,7 +13,7 @@
#include "Configuration.h"
#include "State.h"
-#include "Types.h"
+#include "DeviceTypes.h"
using namespace ompx;
diff --git a/offload/DeviceRTL/src/Debug.cpp b/offload/DeviceRTL/src/Debug.cpp
index 4e16591cc6c51..af26a26c13109 100644
--- a/offload/DeviceRTL/src/Debug.cpp
+++ b/offload/DeviceRTL/src/Debug.cpp
@@ -17,7 +17,7 @@
#include "Interface.h"
#include "Mapping.h"
#include "State.h"
-#include "Types.h"
+#include "DeviceTypes.h"
using namespace ompx;
diff --git a/offload/DeviceRTL/src/Utils.cpp b/offload/DeviceRTL/src/DeviceUtils.cpp
similarity index 90%
rename from offload/DeviceRTL/src/Utils.cpp
rename to offload/DeviceRTL/src/DeviceUtils.cpp
index 53cc803234867..c204a7be73b1f 100644
--- a/offload/DeviceRTL/src/Utils.cpp
+++ b/offload/DeviceRTL/src/DeviceUtils.cpp
@@ -9,7 +9,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Utils.h"
+#include "DeviceUtils.h"
#include "Debug.h"
#include "Interface.h"
@@ -33,7 +33,7 @@ uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
}
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
+int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
int32_t Width);
@@ -44,8 +44,7 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred);
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
- int Width = mapping::getWarpSize();
+int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
int Self = mapping::getThreadIdInWarp();
int Index = SrcLane + (Self & ~(Width - 1));
return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
@@ -77,8 +76,8 @@ bool isSharedMemPtr(const void *Ptr) {
device = {arch(nvptx, nvptx64)}, \
implementation = {extension(match_any)})
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
- return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f);
+int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
+ return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, Width - 1);
}
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
@@ -104,8 +103,9 @@ void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
impl::Unpack(Val, &LowBits, &HighBits);
}
-int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
- return impl::shuffle(Mask, Var, SrcLane);
+int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
+ int32_t Width) {
+ return impl::shuffle(Mask, Var, SrcLane, Width);
}
int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index e70704f25e922..c3b4554574786 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -18,7 +18,7 @@
#include "Mapping.h"
#include "State.h"
#include "Synchronization.h"
-#include "Types.h"
+#include "DeviceTypes.h"
#include "Workshare.h"
#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index c1ce878746a69..8287312c74e4e 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -12,8 +12,8 @@
#include "Mapping.h"
#include "Interface.h"
#include "State.h"
-#include "Types.h"
-#include "Utils.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
#pragma omp begin declare target device_type(nohost)
diff --git a/offload/DeviceRTL/src/Misc.cpp b/offload/DeviceRTL/src/Misc.cpp
index c24af9442d16e..ca8b549b28dbf 100644
--- a/offload/DeviceRTL/src/Misc.cpp
+++ b/offload/DeviceRTL/src/Misc.cpp
@@ -10,7 +10,7 @@
//===----------------------------------------------------------------------===//
#include "Configuration.h"
-#include "Types.h"
+#include "DeviceTypes.h"
#include "Debug.h"
diff --git a/offload/DeviceRTL/src/Parallelism.cpp b/offload/DeviceRTL/src/Parallelism.cpp
index 15b991f202539..e3fcfef9b22aa 100644
--- a/offload/DeviceRTL/src/Parallelism.cpp
+++ b/offload/DeviceRTL/src/Parallelism.cpp
@@ -37,8 +37,8 @@
#include "Mapping.h"
#include "State.h"
#include "Synchronization.h"
-#include "Types.h"
-#include "Utils.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
using namespace ompx;
diff --git a/offload/DeviceRTL/src/Reduction.cpp b/offload/DeviceRTL/src/Reduction.cpp
index 744d1a3a231c8..f4e2e0d25bde9 100644
--- a/offload/DeviceRTL/src/Reduction.cpp
+++ b/offload/DeviceRTL/src/Reduction.cpp
@@ -15,8 +15,8 @@
#include "Mapping.h"
#include "State.h"
#include "Synchronization.h"
-#include "Types.h"
-#include "Utils.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
using namespace ompx;
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index a1e4fa2449d9a..3ba4b5c65c43a 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -13,13 +13,13 @@
#include "Allocator.h"
#include "Configuration.h"
#include "Debug.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
#include "Interface.h"
#include "LibC.h"
#include "Mapping.h"
#include "State.h"
#include "Synchronization.h"
-#include "Types.h"
-#include "Utils.h"
using namespace ompx;
@@ -84,14 +84,14 @@ struct SharedMemorySmartStackTy {
/// Deallocate the last allocation made by the encountering thread and pointed
/// to by \p Ptr from the stack. Each thread can call this function.
- void pop(void *Ptr, uint32_t Bytes);
+ void pop(void *Ptr, uint64_t Bytes);
private:
/// Compute the size of the storage space reserved for a thread.
uint32_t computeThreadStorageTotal() {
uint32_t NumLanesInBlock = mapping::getNumberOfThreadsInBlock();
- return utils::align_down((state::SharedScratchpadSize / NumLanesInBlock),
- allocator::ALIGNMENT);
+ return utils::alignDown((state::SharedScratchpadSize / NumLanesInBlock),
+ allocator::ALIGNMENT);
}
/// Return the top address of the warp data stack, that is the first address
@@ -121,7 +121,7 @@ void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
// First align the number of requested bytes.
/// FIXME: The stack shouldn't require worst-case padding. Alignment needs to
/// be passed in as an argument and the stack rewritten to support it.
- uint64_t AlignedBytes = utils::align_up(Bytes, allocator::ALIGNMENT);
+ uint64_t AlignedBytes = utils::alignPtr(Bytes, allocator::ALIGNMENT);
uint32_t StorageTotal = computeThreadStorageTotal();
@@ -148,8 +148,8 @@ void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
return GlobalMemory;
}
-void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) {
- uint64_t AlignedBytes = utils::align_up(Bytes, allocator::ALIGNMENT);
+void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
+ uint64_t AlignedBytes = utils::alignPtr(Bytes, allocator::ALIGNMENT);
if (utils::isSharedMemPtr(Ptr)) {
int TId = mapping::getThreadIdInBlock();
Usage[TId] -= AlignedBytes;
diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp
index 80ba87b300bcd..97a6b080169ad 100644
--- a/offload/DeviceRTL/src/Synchronization.cpp
+++ b/offload/DeviceRTL/src/Synchronization.cpp
@@ -16,8 +16,8 @@
#include "Interface.h"
#include "Mapping.h"
#include "State.h"
-#include "Types.h"
-#include "Utils.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
#pragma omp begin declare target device_type(nohost)
diff --git a/offload/DeviceRTL/src/Tasking.cpp b/offload/DeviceRTL/src/Tasking.cpp
index 2dc33562e6d79..23a967c1a337e 100644
--- a/offload/DeviceRTL/src/Tasking.cpp
+++ b/offload/DeviceRTL/src/Tasking.cpp
@@ -13,10 +13,10 @@
//
//===----------------------------------------------------------------------===//
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
#include "Interface.h"
#include "State.h"
-#include "Types.h"
-#include "Utils.h"
using namespace ompx;
@@ -34,7 +34,7 @@ TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
TaskDescriptorTy *TaskDescriptor = (TaskDescriptorTy *)memory::allocGlobal(
TaskSizeTotal, "explicit task descriptor");
TaskDescriptor->Payload =
- utils::advance(TaskDescriptor, TaskSizeInclPrivateValuesPadded);
+ utils::advancePtr(TaskDescriptor, TaskSizeInclPrivateValuesPadded);
TaskDescriptor->TaskFn = TaskFn;
return TaskDescriptor;
diff --git a/offload/DeviceRTL/src/Workshare.cpp b/offload/DeviceRTL/src/Workshare.cpp
index 7e087a07e4420..ad60e66548be9 100644
--- a/offload/DeviceRTL/src/Workshare.cpp
+++ b/offload/DeviceRTL/src/Workshare.cpp
@@ -14,12 +14,12 @@
#include "Workshare.h"
#include "Debug.h"
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
#include "Interface.h"
#include "Mapping.h"
#include "State.h"
#include "Synchronization.h"
-#include "Types.h"
-#include "Utils.h"
using namespace ompx;
@@ -349,7 +349,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
if (rank == 0) {
warp_res = atomic::add(&Cnt, change, atomic::seq_cst);
}
- warp_res = utils::shuffle(active, warp_res, leader);
+ warp_res = utils::shuffle(active, warp_res, leader, mapping::getWarpSize());
return warp_res + rank;
}
diff --git a/offload/include/Shared/RefCnt.h b/offload/include/Shared/RefCnt.h
new file mode 100644
index 0000000000000..7c615ba167a3d
--- /dev/null
+++ b/offload/include/Shared/RefCnt.h
@@ -0,0 +1,56 @@
+//===-- Shared/RefCnt.h - Helper to keep track of references --- C++ ------===//
+//
+// Part of the LLVM Project, u...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/100280
More information about the llvm-commits
mailing list