[Openmp-commits] [openmp] ed7ec86 - [OpenMP] Improve alignment handling in the new device runtime

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Tue Jul 27 15:50:40 PDT 2021


Author: Johannes Doerfert
Date: 2021-07-27T17:50:27-05:00
New Revision: ed7ec860f03caf6b702f27a74c3682d061f60e1d

URL: https://github.com/llvm/llvm-project/commit/ed7ec860f03caf6b702f27a74c3682d061f60e1d
DIFF: https://github.com/llvm/llvm-project/commit/ed7ec860f03caf6b702f27a74c3682d061f60e1d.diff

LOG: [OpenMP] Improve alignment handling in the new device runtime

Added: 
    

Modified: 
    openmp/libomptarget/DeviceRTL/include/Utils.h
    openmp/libomptarget/DeviceRTL/src/State.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/include/Utils.h b/openmp/libomptarget/DeviceRTL/include/Utils.h
index 912c40781612..dc4b1cd71a59 100644
--- a/openmp/libomptarget/DeviceRTL/include/Utils.h
+++ b/openmp/libomptarget/DeviceRTL/include/Utils.h
@@ -63,6 +63,15 @@ inline uint32_t popc(uint64_t V) {
   return __builtin_popcountl(V);
 }
 
+/// Return \p V aligned "upwards" according to \p Align.
+template <typename Ty1, typename Ty2> inline Ty1 align_up(Ty1 V, Ty2 Align) {
+  return ((V + Ty1(Align) - 1) / Ty1(Align)) * Ty1(Align);
+}
+/// Return \p V aligned "downwards" according to \p Align.
+template <typename Ty1, typename Ty2> inline Ty1 align_down(Ty1 V, Ty2 Align) {
+  return V - V % Align;
+}
+
 #define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
 #define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
 

diff  --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index dae262a04082..2e00a6ecb02f 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -85,8 +85,8 @@ struct SharedMemorySmartStackTy {
   /// Compute the size of the storage space reserved for a thread.
   uint32_t computeThreadStorageTotal() {
     uint32_t NumLanesInBlock = mapping::getNumberOfProcessorElements();
-    return (state::SharedScratchpadSize - NumLanesInBlock + 1) /
-           NumLanesInBlock;
+    return utils::align_down((state::SharedScratchpadSize / NumLanesInBlock),
+                             Alignment);
   }
 
   /// Return the top address of the warp data stack, that is the first address
@@ -114,7 +114,7 @@ void SharedMemorySmartStackTy::init(bool IsSPMD) {
 
 void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
   // First align the number of requested bytes.
-  uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment;
+  uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
 
   uint32_t StorageTotal = computeThreadStorageTotal();
 
@@ -136,7 +136,7 @@ void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
 }
 
 void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) {
-  uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment;
+  uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
   if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) {
     int TId = mapping::getThreadIdInBlock();
     Usage[TId] -= AlignedBytes;


        


More information about the Openmp-commits mailing list