[llvm] [Offload] Full AMD support for olMemFill (PR #154958)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 07:17:44 PDT 2025
================
@@ -2611,26 +2629,73 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Error dataFillImpl(void *TgtPtr, const void *PatternPtr, int64_t PatternSize,
int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
- hsa_status_t Status;
+ // Fast case, where we can use the 4 byte hsa_amd_memory_fill
+ if (Size % 4 == 0 &&
+ (PatternSize == 4 || PatternSize == 2 || PatternSize == 1)) {
+ uint32_t Pattern;
+ if (PatternSize == 1) {
+ auto *Byte = reinterpret_cast<const uint8_t *>(PatternPtr);
+ Pattern = *Byte | *Byte << 8 | *Byte << 16 | *Byte << 24;
+ } else if (PatternSize == 2) {
+ auto *Word = reinterpret_cast<const uint16_t *>(PatternPtr);
+ Pattern = *Word | (*Word << 16);
+ } else if (PatternSize == 4) {
+ Pattern = *reinterpret_cast<const uint32_t *>(PatternPtr);
+ } else {
+ // Shouldn't be here if the pattern size is outwith those values
+ std::terminate();
----------------
jhuber6 wrote:
Is this an assertion? Use `llvm_unreachable` for stuff like this.
https://github.com/llvm/llvm-project/pull/154958
More information about the llvm-commits
mailing list