[Openmp-commits] [openmp] 51bda3a - [Libomptarget] Replace std::vector with llvm::SmallVector

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Mon Aug 1 12:59:22 PDT 2022


Author: Joseph Huber
Date: 2022-08-01T15:59:15-04:00
New Revision: 51bda3a0e7ac0285cf116b5cfd1393a3340974b5

URL: https://github.com/llvm/llvm-project/commit/51bda3a0e7ac0285cf116b5cfd1393a3340974b5
DIFF: https://github.com/llvm/llvm-project/commit/51bda3a0e7ac0285cf116b5cfd1393a3340974b5.diff

LOG: [Libomptarget] Replace std::vector with llvm::SmallVector

The runtime makes some use of `std::vector` data structures. We should
be able to replace these trivially with `llvm::SmallVector` instead.
This should allow us to avoid heap allocations in the majority of cases
now.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D130927

Added: 
    

Modified: 
    openmp/libomptarget/include/device.h
    openmp/libomptarget/include/rtl.h
    openmp/libomptarget/src/LegacyAPI.cpp
    openmp/libomptarget/src/omptarget.cpp
    openmp/libomptarget/src/private.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index e81934f083705..7f854ac5b4f9f 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -22,11 +22,11 @@
 #include <mutex>
 #include <set>
 #include <thread>
-#include <vector>
 
 #include "ExclusiveAccess.h"
 #include "omptarget.h"
 #include "rtl.h"
+#include "llvm/ADT/SmallVector.h"
 
 // Forward declarations.
 struct RTLInfoTy;
@@ -484,14 +484,14 @@ struct PluginManager {
   std::list<std::pair<__tgt_device_image, __tgt_image_info>> Images;
 
   /// Devices associated with RTLs
-  std::vector<std::unique_ptr<DeviceTy>> Devices;
+  llvm::SmallVector<std::unique_ptr<DeviceTy>> Devices;
   std::mutex RTLsMtx; ///< For RTLs and Devices
 
   /// Translation table retreived from the binary
   HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
   std::mutex TrlTblMtx; ///< For Translation Table
   /// Host offload entries in order of image registration
-  std::vector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
+  llvm::SmallVector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
 
   /// Map from ptrs on the host to an entry in the Translation Table
   HostPtrToTableMapTy HostPtrToTableMap;

diff  --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h
index 5e755393330c7..8d06344f091c1 100644
--- a/openmp/libomptarget/include/rtl.h
+++ b/openmp/libomptarget/include/rtl.h
@@ -14,11 +14,12 @@
 #define _OMPTARGET_RTL_H
 
 #include "omptarget.h"
+#include "llvm/ADT/SmallVector.h"
+
 #include <list>
 #include <map>
 #include <mutex>
 #include <string>
-#include <vector>
 
 // Forward declarations.
 struct DeviceTy;
@@ -137,7 +138,7 @@ struct RTLsTy {
 
   // Array of pointers to the detected runtime libraries that have compatible
   // binaries.
-  std::vector<RTLInfoTy *> UsedRTLs;
+  llvm::SmallVector<RTLInfoTy *> UsedRTLs;
 
   int64_t RequiresFlags = OMP_REQ_UNDEFINED;
 
@@ -172,10 +173,12 @@ struct TranslationTable {
   __tgt_target_table HostTable;
 
   // Image assigned to a given device.
-  std::vector<__tgt_device_image *> TargetsImages; // One image per device ID.
+  llvm::SmallVector<__tgt_device_image *>
+      TargetsImages; // One image per device ID.
 
   // Table of entry points or NULL if it was not already computed.
-  std::vector<__tgt_target_table *> TargetsTable; // One table per device ID.
+  llvm::SmallVector<__tgt_target_table *>
+      TargetsTable; // One table per device ID.
 };
 typedef std::map<__tgt_offload_entry *, TranslationTable>
     HostEntriesBeginToTransTableTy;

diff  --git a/openmp/libomptarget/src/LegacyAPI.cpp b/openmp/libomptarget/src/LegacyAPI.cpp
index 5edbc919f8b21..d963be9d23059 100644
--- a/openmp/libomptarget/src/LegacyAPI.cpp
+++ b/openmp/libomptarget/src/LegacyAPI.cpp
@@ -173,8 +173,8 @@ EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId,
   DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId,
      LoopTripcount);
   PM->TblMapMtx.lock();
-  PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
-                                             LoopTripcount);
+  PM->Devices[DeviceId]->LoopTripCnt[__kmpc_global_thread_num(NULL)] =
+      LoopTripcount;
   PM->TblMapMtx.unlock();
 }
 

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index c33a3edc39891..4615331948d1a 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -20,6 +20,8 @@
 #include <cstdint>
 #include <vector>
 
+using llvm::SmallVector;
+
 int AsyncInfoTy::synchronize() {
   int Result = OFFLOAD_SUCCESS;
   if (AsyncInfo.Queue) {
@@ -384,11 +386,11 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
   // Construct new arrays for args_base, args, arg_sizes and arg_types
   // using the information in MapperComponents and call the corresponding
   // targetData* function using these new arrays.
-  std::vector<void *> MapperArgsBase(MapperComponents.Components.size());
-  std::vector<void *> MapperArgs(MapperComponents.Components.size());
-  std::vector<int64_t> MapperArgSizes(MapperComponents.Components.size());
-  std::vector<int64_t> MapperArgTypes(MapperComponents.Components.size());
-  std::vector<void *> MapperArgNames(MapperComponents.Components.size());
+  SmallVector<void *> MapperArgsBase(MapperComponents.Components.size());
+  SmallVector<void *> MapperArgs(MapperComponents.Components.size());
+  SmallVector<int64_t> MapperArgSizes(MapperComponents.Components.size());
+  SmallVector<int64_t> MapperArgTypes(MapperComponents.Components.size());
+  SmallVector<void *> MapperArgNames(MapperComponents.Components.size());
 
   for (unsigned I = 0, E = MapperComponents.Components.size(); I < E; ++I) {
     auto &C = MapperComponents.Components[I];
@@ -679,7 +681,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
                   int64_t *ArgTypes, map_var_info_t *ArgNames,
                   void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
   int Ret;
-  std::vector<PostProcessingInfo> PostProcessingPtrs;
+  SmallVector<PostProcessingInfo> PostProcessingPtrs;
   void *FromMapperBase = nullptr;
   // process each input.
   for (int32_t I = ArgNum - 1; I >= 0; --I) {
@@ -883,7 +885,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
       // If the struct is to be deallocated, remove the shadow entry.
       if (Info.DelEntry) {
         DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first));
-        Itr = Device.ShadowPtrMap.erase(Itr);
+        auto OldItr = Itr;
+        Itr++;
+        Device.ShadowPtrMap.erase(OldItr);
       } else {
         ++Itr;
       }
@@ -1171,12 +1175,12 @@ class PrivateArgumentManagerTy {
   };
 
   /// A vector of target pointers for all private arguments
-  std::vector<void *> TgtPtrs;
+  SmallVector<void *> TgtPtrs;
 
   /// A vector of information of all first-private arguments to be packed
-  std::vector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
+  SmallVector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
   /// Host buffer for all arguments to be packed
-  std::vector<char> FirstPrivateArgBuffer;
+  SmallVector<char> FirstPrivateArgBuffer;
   /// The total size of all arguments to be packed
   int64_t FirstPrivateArgSize = 0;
 
@@ -1255,7 +1259,7 @@ class PrivateArgumentManagerTy {
 
   /// Pack first-private arguments, replace place holder pointers in \p TgtArgs,
   /// and start the transfer.
-  int packAndTransfer(std::vector<void *> &TgtArgs) {
+  int packAndTransfer(SmallVector<void *> &TgtArgs) {
     if (!FirstPrivateArgInfo.empty()) {
       assert(FirstPrivateArgSize != 0 &&
              "FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty");
@@ -1323,8 +1327,8 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
                              int32_t ArgNum, void **ArgBases, void **Args,
                              int64_t *ArgSizes, int64_t *ArgTypes,
                              map_var_info_t *ArgNames, void **ArgMappers,
-                             std::vector<void *> &TgtArgs,
-                             std::vector<ptr
diff _t> &TgtOffsets,
+                             SmallVector<void *> &TgtArgs,
+                             SmallVector<ptr
diff _t> &TgtOffsets,
                              PrivateArgumentManagerTy &PrivateArgumentManager,
                              AsyncInfoTy &AsyncInfo) {
   TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc);
@@ -1337,7 +1341,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
   }
 
   // List of (first-)private arrays allocated for this target region
-  std::vector<int> TgtArgsPositions(ArgNum, -1);
+  SmallVector<int> TgtArgsPositions(ArgNum, -1);
 
   for (int32_t I = 0; I < ArgNum; ++I) {
     if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
@@ -1521,8 +1525,8 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
   // begin addresses, not bases. That's why we pass args and offsets as two
   // separate entities so that each plugin can do what it needs. This behavior
   // was introdued via https://reviews.llvm.org/D33028 and commit 1546d319244c.
-  std::vector<void *> TgtArgs;
-  std::vector<ptr
diff _t> TgtOffsets;
+  SmallVector<void *> TgtArgs;
+  SmallVector<ptr
diff _t> TgtOffsets;
 
   PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
 
@@ -1547,11 +1551,11 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
     TIMESCOPE_WITH_NAME_AND_IDENT(
         IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", Loc);
     if (IsTeamConstruct)
-      Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
+      Ret = Device.runTeamRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
                                  TgtArgs.size(), TeamNum, ThreadLimit,
                                  Tripcount, AsyncInfo);
     else
-      Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
+      Ret = Device.runRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
                              TgtArgs.size(), AsyncInfo);
   }
 

diff  --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 6be4a7f0e0909..2fe7c6337a213 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -67,7 +67,7 @@ struct MapComponentInfoTy {
 // components are dynamically decided, so we utilize C++ STL vector
 // implementation here.
 struct MapperComponentsTy {
-  std::vector<MapComponentInfoTy> Components;
+  llvm::SmallVector<MapComponentInfoTy> Components;
   int32_t size() { return Components.size(); }
 };
 


        


More information about the Openmp-commits mailing list