[llvm] [Offload] Full AMD support for olMemFill (PR #154958)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 07:17:44 PDT 2025
================
@@ -2611,26 +2629,73 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Error dataFillImpl(void *TgtPtr, const void *PatternPtr, int64_t PatternSize,
int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
- hsa_status_t Status;
+ // Fast case, where we can use the 4 byte hsa_amd_memory_fill
+ if (Size % 4 == 0 &&
+ (PatternSize == 4 || PatternSize == 2 || PatternSize == 1)) {
+ uint32_t Pattern;
+ if (PatternSize == 1) {
+ auto *Byte = reinterpret_cast<const uint8_t *>(PatternPtr);
+ Pattern = *Byte | *Byte << 8 | *Byte << 16 | *Byte << 24;
+ } else if (PatternSize == 2) {
+ auto *Word = reinterpret_cast<const uint16_t *>(PatternPtr);
+ Pattern = *Word | (*Word << 16);
+ } else if (PatternSize == 4) {
+ Pattern = *reinterpret_cast<const uint32_t *>(PatternPtr);
+ } else {
+ // Shouldn't be here if the pattern size is outwith those values
+ std::terminate();
+ }
- // We can use hsa_amd_memory_fill for this size, but it's not async so the
- // queue needs to be synchronized first
- if (PatternSize == 4) {
- if (AsyncInfoWrapper.hasQueue())
- if (auto Err = synchronize(AsyncInfoWrapper))
+ if (hasPendingWorkImpl(AsyncInfoWrapper)) {
+ AMDGPUStreamTy *Stream = nullptr;
+ if (auto Err = getStream(AsyncInfoWrapper, Stream))
return Err;
- Status = hsa_amd_memory_fill(TgtPtr,
- *static_cast<const uint32_t *>(PatternPtr),
- Size / PatternSize);
- if (auto Err =
- Plugin::check(Status, "error in hsa_amd_memory_fill: %s\n"))
- return Err;
- } else {
- // TODO: Implement for AMDGPU. Most likely by doing the fill in pinned
- // memory and copying to the device in one go.
- return Plugin::error(ErrorCode::UNSUPPORTED, "Unsupported fill size");
+ struct MemFillArgsTy {
+ void *Dst;
+ uint32_t Pattern;
+ int64_t Size;
+ };
+ auto *Args = new MemFillArgsTy{TgtPtr, Pattern, Size / 4};
+ auto Fill = [](void *Data) {
+ MemFillArgsTy *Args = reinterpret_cast<MemFillArgsTy *>(Data);
+ assert(Args && "Invalid arguments");
+
+ auto Status =
+ hsa_amd_memory_fill(Args->Dst, Args->Pattern, Args->Size);
+ delete Args;
+ auto Err =
+ Plugin::check(Status, "error in hsa_amd_memory_fill: %s\n");
+ if (Err) {
+ FATAL_MESSAGE(1, "error performing async fill: %s",
+ toString(std::move(Err)).data());
+ }
----------------
jhuber6 wrote:
We should try as hard as possible not to just roll over and die inside of the plugin. We don't do a great job of it so far.
https://github.com/llvm/llvm-project/pull/154958
More information about the llvm-commits
mailing list