[Openmp-commits] [openmp] 13a0b48 - [OpenMP][libomptarget][AMDGPU] Update print launch info
JP Lehr via Openmp-commits
openmp-commits at lists.llvm.org
Wed Mar 15 03:12:53 PDT 2023
Author: JP Lehr
Date: 2023-03-15T06:11:01-04:00
New Revision: 13a0b48f37250ad2f8cb72c04e72600874112037
URL: https://github.com/llvm/llvm-project/commit/13a0b48f37250ad2f8cb72c04e72600874112037
DIFF: https://github.com/llvm/llvm-project/commit/13a0b48f37250ad2f8cb72c04e72600874112037.diff
LOG: [OpenMP][libomptarget][AMDGPU] Update print launch info
Clean up for the AMD-specific kernel launch info in the NextGen Plugins.
- Fixes a mistake introduced with the initial commit that added printing
of an AMD-only property.
- Removes another AMD-only property (not clear on upstream status)
- Adds some more comment to what info is printed.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D145924
Added:
Modified:
openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
openmp/libomptarget/test/offloading/info.c
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 99b45ad386d8a..e03825651286d 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2614,33 +2614,39 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
return Plugin::success();
// General Info
- auto ConstWGSize = getDefaultNumThreads(GenericDevice);
auto NumGroups = NumBlocks;
- auto ThreadsPerGroup = getDefaultNumThreads(GenericDevice);
- auto NumTeams = KernelArgs.NumTeams[0]; // Only first dimension
- auto ThreadLimit = KernelArgs.ThreadLimit[0]; // Only first dimension
+ auto ThreadsPerGroup = NumThreads;
// Kernel Arguments Info
auto ArgNum = KernelArgs.NumArgs;
auto LoopTripCount = KernelArgs.Tripcount;
- // Details for AMDGPU kernels
+ // Details for AMDGPU kernels (read from image)
+ // https://www.llvm.org/docs/AMDGPUUsage.html#code-object-v4-metadata
auto GroupSegmentSize = (*KernelInfo).GroupSegmentList;
auto SGPRCount = (*KernelInfo).SGPRCount;
auto VGPRCount = (*KernelInfo).VGPRCount;
auto SGPRSpillCount = (*KernelInfo).SGPRSpillCount;
auto VGPRSpillCount = (*KernelInfo).VGPRSpillCount;
-
- // TODO set correctly once host services available
- auto HostCallRequired = false;
+ auto MaxFlatWorkgroupSize = (*KernelInfo).MaxFlatWorkgroupSize;
+
+ // Prints additional launch info that contains the following.
+ // Num Args: The number of kernel arguments
+ // Teams x Thrds: The number of teams and the number of threads actually
+ // running.
+ // MaxFlatWorkgroupSize: Maximum flat work-group size supported by the
+ // kernel in work-items
+ // LDS Usage: Amount of bytes used in LDS storage
+ // S/VGPR Count: the number of S/V GPRs occupied by the kernel
+ // S/VGPR Spill Count: how many S/VGPRs are spilled by the kernel
+ // Tripcount: loop tripcount for the kernel
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
- "SGN:%s ConstWGSize:%d args:%d teamsXthrds:(%4luX%4d) "
- "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u "
- "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu rpc:%d n:%s\n",
- getExecutionModeName(), ConstWGSize, ArgNum, NumGroups, ThreadsPerGroup,
- NumTeams, ThreadLimit, GroupSegmentSize, SGPRCount, VGPRCount,
- SGPRSpillCount, VGPRSpillCount, LoopTripCount, HostCallRequired,
- getName());
+ "#Args: %d Teams x Thrds: %4lux%4u (MaxFlatWorkGroupSize: %u) LDS "
+ "Usage: %uB #SGPRs/VGPRs: %u/%u #SGPR/VGPR Spills: %u/%u Tripcount: "
+ "%lu\n",
+ ArgNum, NumGroups, ThreadsPerGroup, MaxFlatWorkgroupSize,
+ GroupSegmentSize, SGPRCount, VGPRCount, SGPRSpillCount, VGPRSpillCount,
+ LoopTripCount);
return Plugin::success();
}
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 1c549189b5c4d..cdf1d10980e9a 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -146,6 +146,10 @@ struct KernelMetaDataTy {
uint32_t KernelSegmentSize;
uint32_t ExplicitArgumentCount;
uint32_t ImplicitArgumentCount;
+ uint32_t RequestedWorkgroupSize[3];
+ uint32_t WorkgroupSizeHint[3];
+ uint32_t WavefronSize;
+ uint32_t MaxFlatWorkgroupSize;
};
namespace {
@@ -194,6 +198,19 @@ class KernelInfoReader {
return DK.getString() == SK;
};
+ const auto getSequenceOfThreeInts = [](msgpack::DocNode &DN,
+ uint32_t *Vals) {
+ assert(DN.isArray() && "MsgPack DocNode is an array node");
+ auto DNA = DN.getArray();
+ assert(DNA.size() == 3 && "ArrayNode has at most three elements");
+
+ int i = 0;
+ for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
+ ++DNABegin) {
+ Vals[i++] = DNABegin->getUInt();
+ }
+ };
+
if (isKey(V.first, ".name")) {
KernelName = V.second.toString();
} else if (isKey(V.first, ".sgpr_count")) {
@@ -208,6 +225,14 @@ class KernelInfoReader {
KernelData.PrivateSegmentSize = V.second.getUInt();
} else if (isKey(V.first, ".group_segement_fixed_size")) {
KernelData.GroupSegmentList = V.second.getUInt();
+ } else if (isKey(V.first, ".reqd_workgroup_size")) {
+ getSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
+ } else if (isKey(V.first, ".workgroup_size_hint")) {
+ getSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
+ } else if (isKey(V.first, ".wavefront_size")) {
+ KernelData.WavefronSize = V.second.getUInt();
+ } else if (isKey(V.first, ".max_flat_workgroup_size")) {
+ KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
}
return Error::success();
@@ -295,6 +320,7 @@ Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
return Error::success();
}
+
} // namespace utils
} // namespace plugin
} // namespace target
diff --git a/openmp/libomptarget/test/offloading/info.c b/openmp/libomptarget/test/offloading/info.c
index f24727400bcdb..c2f8450064106 100644
--- a/openmp/libomptarget/test/offloading/info.c
+++ b/openmp/libomptarget/test/offloading/info.c
@@ -39,7 +39,7 @@ int main() {
// INFO: info: Entering OpenMP kernel at info.c:{{[0-9]+}}:{{[0-9]+}} with 1 arguments:
// INFO: info: firstprivate(val)[4]
// INFO: info: Launching kernel __omp_offloading_{{.*}}main{{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
-// AMDGPU: AMDGPU device {{[0-9]}} info: SGN:Generic ConstWGSize:{{[0-9]+}} args:{{[0-9]}} teamsXthrds:({{ [0-9]+}}X {{[0-9]+}}) reqd:( {{[0-9]+}}X {{[0-9]+}}) lds_usage:{{[0-9]+}}B sgpr_count:{{[0-9]+}} vgpr_count:{{[0-9]+}} sgpr_spill_count:{{[0-9]+}} vgpr_spill_count:{{[0-9]+}} tripcount:{{[0-9]+}} rpc:0 n:__omp_offloading_{{.*}}main{{.*}}
+// AMDGPU: AMDGPU device {{[0-9]}} info: #Args: {{[0-9]}} Teams x Thrds: {{[0-9]+}}x {{[0-9]+}} (MaxFlatWorkGroupSize: {{[0-9]+}}) LDS Usage: {{[0-9]+}}B #SGPRs/VGPRs: {{[0-9]+}}/{{[0-9]+}} #SGPR/VGPR Spills: {{[0-9]+}}/{{[0-9]+}} Tripcount: {{[0-9]+}}
// INFO: info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:{{[0-9]+}}:
// INFO: info: Host Ptr Target Ptr Size (B) DynRefCount HoldRefCount Declaration
// INFO: info: {{.*}} {{.*}} 256 1 0 C[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
More information about the Openmp-commits
mailing list