[Openmp-commits] [openmp] b34d31d - [OpenMP] Fix record-replay allocation order for kernel environment (#71863)
via Openmp-commits
openmp-commits at lists.llvm.org
Thu Nov 9 12:51:25 PST 2023
Author: Konstantinos Parasyris
Date: 2023-11-09T12:51:22-08:00
New Revision: b34d31d2e12b3b2cf574b16275dd6aafec586a7a
URL: https://github.com/llvm/llvm-project/commit/b34d31d2e12b3b2cf574b16275dd6aafec586a7a
DIFF: https://github.com/llvm/llvm-project/commit/b34d31d2e12b3b2cf574b16275dd6aafec586a7a.diff
LOG: [OpenMP] Fix record-replay allocation order for kernel environment (#71863)
Added:
Modified:
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 117ed94a1da6ffa..b55509c2f28fffc 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -231,10 +231,9 @@ struct RecordReplayTy {
OS.close();
}
- void saveKernelInputInfo(const char *Name, DeviceImageTy &Image,
- void **ArgPtrs, ptr
diff _t *ArgOffsets,
- int32_t NumArgs, uint64_t NumTeamsClause,
- uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
+ void saveKernelDescr(const char *Name, void **ArgPtrs, ptr
diff _t *ArgOffsets,
+ int32_t NumArgs, uint64_t NumTeamsClause,
+ uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
json::Object JsonKernelInfo;
JsonKernelInfo["Name"] = Name;
JsonKernelInfo["NumArgs"] = NumArgs;
@@ -255,12 +254,6 @@ struct RecordReplayTy {
JsonArgOffsets.push_back(ArgOffsets[I]);
JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets));
- SmallString<128> MemoryFilename = {Name, ".memory"};
- dumpDeviceMemory(MemoryFilename);
-
- SmallString<128> GlobalsFilename = {Name, ".globals"};
- dumpGlobals(GlobalsFilename, Image);
-
SmallString<128> JsonFilename = {Name, ".json"};
std::error_code EC;
raw_fd_ostream JsonOS(JsonFilename.str(), EC);
@@ -271,6 +264,14 @@ struct RecordReplayTy {
JsonOS.close();
}
+ void saveKernelInput(const char *Name, DeviceImageTy &Image) {
+ SmallString<128> GlobalsFilename = {Name, ".globals"};
+ dumpGlobals(GlobalsFilename, Image);
+
+ SmallString<128> MemoryFilename = {Name, ".memory"};
+ dumpDeviceMemory(MemoryFilename);
+ }
+
void saveKernelOutputInfo(const char *Name) {
SmallString<128> OutputFilename = {
Name, (isRecording() ? ".original.output" : ".replay.output")};
@@ -504,12 +505,6 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
return Err;
- if (RecordReplay.isRecording())
- RecordReplay.saveKernelInputInfo(
- getName(), getImage(), ArgPtrs, ArgOffsets,
- KernelArgs.NumArgs - /* KernelLaunchEnvironment */ 1, NumBlocks,
- NumThreads, KernelArgs.Tripcount);
-
return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs,
KernelArgsPtr, AsyncInfoWrapper);
}
@@ -1411,12 +1406,21 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
GenericKernelTy &GenericKernel =
*reinterpret_cast<GenericKernelTy *>(EntryPtr);
- if (RecordReplay.isRecording())
+ if (RecordReplay.isRecording()) {
RecordReplay.saveImage(GenericKernel.getName(), GenericKernel.getImage());
+ RecordReplay.saveKernelInput(GenericKernel.getName(),
+ GenericKernel.getImage());
+ }
auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs,
AsyncInfoWrapper);
+ if (RecordReplay.isRecording())
+ RecordReplay.saveKernelDescr(GenericKernel.getName(), ArgPtrs, ArgOffsets,
+ KernelArgs.NumArgs, KernelArgs.NumTeams[0],
+ KernelArgs.ThreadLimit[0],
+ KernelArgs.Tripcount);
+
// 'finalize' here to guarantee next record-replay actions are in-sync
AsyncInfoWrapper.finalize(Err);
@@ -1845,7 +1849,8 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
int32_t DstDeviceId, void *DstPtr,
int64_t Size) {
return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
- Size, /* AsyncInfoPtr */ nullptr);
+ Size,
+ /* AsyncInfoPtr */ nullptr);
}
int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
diff --git a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index 93fc3e7853f8e9c..254be7db6e01a41 100644
--- a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -147,7 +147,7 @@ int main(int argc, char **argv) {
uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()];
std::memcpy(recored_data,
const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
- DeviceMemorySizeJson.value() * sizeof(uint8_t));
+ DeviceMemoryMB.get()->getBufferSize());
__tgt_target_kernel_replay(
/* Loc */ nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,
More information about the Openmp-commits
mailing list