[Openmp-commits] [openmp] 51bda3a - [Libomptarget] Replace std::vector with llvm::SmallVector
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Mon Aug 1 12:59:22 PDT 2022
Author: Joseph Huber
Date: 2022-08-01T15:59:15-04:00
New Revision: 51bda3a0e7ac0285cf116b5cfd1393a3340974b5
URL: https://github.com/llvm/llvm-project/commit/51bda3a0e7ac0285cf116b5cfd1393a3340974b5
DIFF: https://github.com/llvm/llvm-project/commit/51bda3a0e7ac0285cf116b5cfd1393a3340974b5.diff
LOG: [Libomptarget] Replace std::vector with llvm::SmallVector
The runtime makes some use of `std::vector` data structures. We should
be able to replace these trivially with `llvm::SmallVector` instead.
This should allow us to avoid heap allocations in the majority of cases
now.
Reviewed By: tianshilei1992
Differential Revision: https://reviews.llvm.org/D130927
Added:
Modified:
openmp/libomptarget/include/device.h
openmp/libomptarget/include/rtl.h
openmp/libomptarget/src/LegacyAPI.cpp
openmp/libomptarget/src/omptarget.cpp
openmp/libomptarget/src/private.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index e81934f083705..7f854ac5b4f9f 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -22,11 +22,11 @@
#include <mutex>
#include <set>
#include <thread>
-#include <vector>
#include "ExclusiveAccess.h"
#include "omptarget.h"
#include "rtl.h"
+#include "llvm/ADT/SmallVector.h"
// Forward declarations.
struct RTLInfoTy;
@@ -484,14 +484,14 @@ struct PluginManager {
std::list<std::pair<__tgt_device_image, __tgt_image_info>> Images;
/// Devices associated with RTLs
- std::vector<std::unique_ptr<DeviceTy>> Devices;
+ llvm::SmallVector<std::unique_ptr<DeviceTy>> Devices;
std::mutex RTLsMtx; ///< For RTLs and Devices
/// Translation table retreived from the binary
HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
std::mutex TrlTblMtx; ///< For Translation Table
/// Host offload entries in order of image registration
- std::vector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
+ llvm::SmallVector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
/// Map from ptrs on the host to an entry in the Translation Table
HostPtrToTableMapTy HostPtrToTableMap;
diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h
index 5e755393330c7..8d06344f091c1 100644
--- a/openmp/libomptarget/include/rtl.h
+++ b/openmp/libomptarget/include/rtl.h
@@ -14,11 +14,12 @@
#define _OMPTARGET_RTL_H
#include "omptarget.h"
+#include "llvm/ADT/SmallVector.h"
+
#include <list>
#include <map>
#include <mutex>
#include <string>
-#include <vector>
// Forward declarations.
struct DeviceTy;
@@ -137,7 +138,7 @@ struct RTLsTy {
// Array of pointers to the detected runtime libraries that have compatible
// binaries.
- std::vector<RTLInfoTy *> UsedRTLs;
+ llvm::SmallVector<RTLInfoTy *> UsedRTLs;
int64_t RequiresFlags = OMP_REQ_UNDEFINED;
@@ -172,10 +173,12 @@ struct TranslationTable {
__tgt_target_table HostTable;
// Image assigned to a given device.
- std::vector<__tgt_device_image *> TargetsImages; // One image per device ID.
+ llvm::SmallVector<__tgt_device_image *>
+ TargetsImages; // One image per device ID.
// Table of entry points or NULL if it was not already computed.
- std::vector<__tgt_target_table *> TargetsTable; // One table per device ID.
+ llvm::SmallVector<__tgt_target_table *>
+ TargetsTable; // One table per device ID.
};
typedef std::map<__tgt_offload_entry *, TranslationTable>
HostEntriesBeginToTransTableTy;
diff --git a/openmp/libomptarget/src/LegacyAPI.cpp b/openmp/libomptarget/src/LegacyAPI.cpp
index 5edbc919f8b21..d963be9d23059 100644
--- a/openmp/libomptarget/src/LegacyAPI.cpp
+++ b/openmp/libomptarget/src/LegacyAPI.cpp
@@ -173,8 +173,8 @@ EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId,
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId,
LoopTripcount);
PM->TblMapMtx.lock();
- PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
- LoopTripcount);
+ PM->Devices[DeviceId]->LoopTripCnt[__kmpc_global_thread_num(NULL)] =
+ LoopTripcount;
PM->TblMapMtx.unlock();
}
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index c33a3edc39891..4615331948d1a 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -20,6 +20,8 @@
#include <cstdint>
#include <vector>
+using llvm::SmallVector;
+
int AsyncInfoTy::synchronize() {
int Result = OFFLOAD_SUCCESS;
if (AsyncInfo.Queue) {
@@ -384,11 +386,11 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
// Construct new arrays for args_base, args, arg_sizes and arg_types
// using the information in MapperComponents and call the corresponding
// targetData* function using these new arrays.
- std::vector<void *> MapperArgsBase(MapperComponents.Components.size());
- std::vector<void *> MapperArgs(MapperComponents.Components.size());
- std::vector<int64_t> MapperArgSizes(MapperComponents.Components.size());
- std::vector<int64_t> MapperArgTypes(MapperComponents.Components.size());
- std::vector<void *> MapperArgNames(MapperComponents.Components.size());
+ SmallVector<void *> MapperArgsBase(MapperComponents.Components.size());
+ SmallVector<void *> MapperArgs(MapperComponents.Components.size());
+ SmallVector<int64_t> MapperArgSizes(MapperComponents.Components.size());
+ SmallVector<int64_t> MapperArgTypes(MapperComponents.Components.size());
+ SmallVector<void *> MapperArgNames(MapperComponents.Components.size());
for (unsigned I = 0, E = MapperComponents.Components.size(); I < E; ++I) {
auto &C = MapperComponents.Components[I];
@@ -679,7 +681,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
int Ret;
- std::vector<PostProcessingInfo> PostProcessingPtrs;
+ SmallVector<PostProcessingInfo> PostProcessingPtrs;
void *FromMapperBase = nullptr;
// process each input.
for (int32_t I = ArgNum - 1; I >= 0; --I) {
@@ -883,7 +885,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
// If the struct is to be deallocated, remove the shadow entry.
if (Info.DelEntry) {
DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first));
- Itr = Device.ShadowPtrMap.erase(Itr);
+ auto OldItr = Itr;
+ Itr++;
+ Device.ShadowPtrMap.erase(OldItr);
} else {
++Itr;
}
@@ -1171,12 +1175,12 @@ class PrivateArgumentManagerTy {
};
/// A vector of target pointers for all private arguments
- std::vector<void *> TgtPtrs;
+ SmallVector<void *> TgtPtrs;
/// A vector of information of all first-private arguments to be packed
- std::vector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
+ SmallVector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
/// Host buffer for all arguments to be packed
- std::vector<char> FirstPrivateArgBuffer;
+ SmallVector<char> FirstPrivateArgBuffer;
/// The total size of all arguments to be packed
int64_t FirstPrivateArgSize = 0;
@@ -1255,7 +1259,7 @@ class PrivateArgumentManagerTy {
/// Pack first-private arguments, replace place holder pointers in \p TgtArgs,
/// and start the transfer.
- int packAndTransfer(std::vector<void *> &TgtArgs) {
+ int packAndTransfer(SmallVector<void *> &TgtArgs) {
if (!FirstPrivateArgInfo.empty()) {
assert(FirstPrivateArgSize != 0 &&
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty");
@@ -1323,8 +1327,8 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
int32_t ArgNum, void **ArgBases, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers,
- std::vector<void *> &TgtArgs,
- std::vector<ptr
diff _t> &TgtOffsets,
+ SmallVector<void *> &TgtArgs,
+ SmallVector<ptr
diff _t> &TgtOffsets,
PrivateArgumentManagerTy &PrivateArgumentManager,
AsyncInfoTy &AsyncInfo) {
TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc);
@@ -1337,7 +1341,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
}
// List of (first-)private arrays allocated for this target region
- std::vector<int> TgtArgsPositions(ArgNum, -1);
+ SmallVector<int> TgtArgsPositions(ArgNum, -1);
for (int32_t I = 0; I < ArgNum; ++I) {
if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
@@ -1521,8 +1525,8 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
// begin addresses, not bases. That's why we pass args and offsets as two
// separate entities so that each plugin can do what it needs. This behavior
// was introdued via https://reviews.llvm.org/D33028 and commit 1546d319244c.
- std::vector<void *> TgtArgs;
- std::vector<ptr
diff _t> TgtOffsets;
+ SmallVector<void *> TgtArgs;
+ SmallVector<ptr
diff _t> TgtOffsets;
PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
@@ -1547,11 +1551,11 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
TIMESCOPE_WITH_NAME_AND_IDENT(
IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", Loc);
if (IsTeamConstruct)
- Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
+ Ret = Device.runTeamRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
TgtArgs.size(), TeamNum, ThreadLimit,
Tripcount, AsyncInfo);
else
- Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
+ Ret = Device.runRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
TgtArgs.size(), AsyncInfo);
}
diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 6be4a7f0e0909..2fe7c6337a213 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -67,7 +67,7 @@ struct MapComponentInfoTy {
// components are dynamically decided, so we utilize C++ STL vector
// implementation here.
struct MapperComponentsTy {
- std::vector<MapComponentInfoTy> Components;
+ llvm::SmallVector<MapComponentInfoTy> Components;
int32_t size() { return Components.size(); }
};
More information about the Openmp-commits
mailing list