[libc-commits] [libc] [libc] Efficiently implement 'realloc' for AMDGPU devices (PR #145960)
Joseph Huber via libc-commits
libc-commits at lists.llvm.org
Fri Jun 27 12:36:24 PDT 2025
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/145960
>From 797fbf03c7bc547e0a2f054e4da522d1eb09fdec Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 26 Jun 2025 15:05:54 -0500
Subject: [PATCH 1/3] [libc] Efficiently implement 'realloc' for AMDGPU devices
Summary:
Now that we have `malloc` we can implement `realloc` efficiently. This
uses the known chunk sizes to avoid unnecessary allocations. We just
return nullptr for NVPTX. I'd remove the list for the entrypoint but
then the libc++ code would stop working. When someone writes the NVPTX
support this will be trivial.
---
libc/src/__support/GPU/allocator.cpp | 22 ++++++++++
libc/src/__support/GPU/allocator.h | 1 +
libc/src/stdlib/gpu/realloc.cpp | 18 +++-----
.../integration/src/stdlib/gpu/CMakeLists.txt | 15 +++++++
.../integration/src/stdlib/gpu/realloc.cpp | 44 +++++++++++++++++++
5 files changed, 89 insertions(+), 11 deletions(-)
create mode 100644 libc/test/integration/src/stdlib/gpu/realloc.cpp
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 5ea27a9c44b66..7a4a342e20b0f 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -22,6 +22,7 @@
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/threads/sleep.h"
+#include "src/string/memory_utils/inline_memcpy.h"
namespace LIBC_NAMESPACE_DECL {
@@ -550,5 +551,26 @@ void deallocate(void *ptr) {
release_slab(slab);
}
+void *reallocate(void *ptr, uint64_t size) {
+ if (ptr == nullptr)
+ return gpu::allocate(size);
+
+ // Non-slab allocations are considered foreign pointers so we fail.
+ if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
+ return nullptr;
+
+ // The original slab pointer is the 2MiB boundary using the given pointer.
+ Slab *slab = reinterpret_cast<Slab *>(
+ (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
+ if (slab->get_chunk_size() >= size)
+ return ptr;
+
+ // If we need a new chunk we reallocate and copy it over.
+ void *new_ptr = gpu::allocate(size);
+ inline_memcpy(new_ptr, ptr, slab->get_chunk_size());
+ gpu::deallocate(ptr);
+ return new_ptr;
+}
+
} // namespace gpu
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/GPU/allocator.h b/libc/src/__support/GPU/allocator.h
index 466009aa71981..757f3a406015b 100644
--- a/libc/src/__support/GPU/allocator.h
+++ b/libc/src/__support/GPU/allocator.h
@@ -17,6 +17,7 @@ namespace gpu {
void *allocate(uint64_t size);
void deallocate(void *ptr);
+void *reallocate(void *ptr, uint64_t size);
} // namespace gpu
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/gpu/realloc.cpp b/libc/src/stdlib/gpu/realloc.cpp
index 4fd4d6b278179..d4929014a4b95 100644
--- a/libc/src/stdlib/gpu/realloc.cpp
+++ b/libc/src/stdlib/gpu/realloc.cpp
@@ -16,17 +16,13 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) {
- if (ptr == nullptr)
- return gpu::allocate(size);
-
- void *newmem = gpu::allocate(size);
- if (newmem == nullptr)
- return nullptr;
-
- // This will copy garbage if it goes beyond the old allocation size.
- inline_memcpy(newmem, ptr, size);
- gpu::deallocate(ptr);
- return newmem;
+ // FIXME: NVIDIA targets currently use the built-in 'malloc' which we cannot
+ // reason with. But we still need to provide this function for compatibility.
+#ifndef LIBC_TARGET_ARCH_IS_NVPTX
+ return gpu::reallocate(ptr, size);
+#else
+ return nullptr;
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
index 26c877b1b6ae6..69e1909fe78ed 100644
--- a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
+++ b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
@@ -17,6 +17,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
--blocks 1024
)
+ add_integration_test(
+ realloc
+ SUITE
+ stdlib-gpu-integration-tests
+ SRCS
+ realloc.cpp
+ DEPENDS
+ libc.src.stdlib.malloc
+ libc.src.stdlib.free
+ libc.src.stdlib.realloc
+ LOADER_ARGS
+ --threads 256
+ --blocks 1024
+ )
+
add_integration_test(
malloc_stress
SUITE
diff --git a/libc/test/integration/src/stdlib/gpu/realloc.cpp b/libc/test/integration/src/stdlib/gpu/realloc.cpp
new file mode 100644
index 0000000000000..40b261ff872b9
--- /dev/null
+++ b/libc/test/integration/src/stdlib/gpu/realloc.cpp
@@ -0,0 +1,44 @@
+#include "test/IntegrationTest/test.h"
+
+#include "src/__support/GPU/utils.h"
+#include "src/stdlib/free.h"
+#include "src/stdlib/malloc.h"
+#include "src/stdlib/realloc.h"
+
+using namespace LIBC_NAMESPACE;
+
+TEST_MAIN(int, char **, char **) {
+ // realloc(nullptr, size) is equivalent to malloc.
+ int *alloc = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(nullptr, 32));
+ EXPECT_NE(alloc, nullptr);
+ *alloc = 42;
+ EXPECT_EQ(*alloc, 42);
+
+ // realloc to same size returns the same pointer.
+ void *same = LIBC_NAMESPACE::realloc(alloc, 32);
+ EXPECT_NE(same, nullptr);
+ EXPECT_EQ(reinterpret_cast<int *>(same)[0], 42);
+
+ // realloc to smaller size returns same pointer.
+ void *smaller = LIBC_NAMESPACE::realloc(same, 16);
+ EXPECT_NE(smaller, nullptr);
+ EXPECT_EQ(reinterpret_cast<int *>(smaller)[0], 42);
+
+ // realloc to larger size returns new pointer and preserves contents.
+ int *larger = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(smaller, 128));
+ EXPECT_NE(larger, nullptr);
+ EXPECT_EQ(larger[0], 42);
+
+ // realloc works when called with a divergent size.
+ int *div = reinterpret_cast<int *>(
+ LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
+ EXPECT_NE(div, nullptr);
+ div[0] = static_cast<int>(gpu::get_thread_id());
+ int *div_realloc = reinterpret_cast<int *>(
+ LIBC_NAMESPACE::realloc(div, ((gpu::get_thread_id() + 1) * 32)));
+ EXPECT_NE(div_realloc, nullptr);
+ EXPECT_EQ(div_realloc[0], static_cast<int>(gpu::get_thread_id()));
+ LIBC_NAMESPACE::free(div_realloc);
+
+ return 0;
+}
>From e77c6fa7a4a2e1b16c1411e63ceb9e81a00ad330 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 26 Jun 2025 17:26:49 -0500
Subject: [PATCH 2/3] Fix NVPTX error
---
libc/src/stdlib/gpu/realloc.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libc/src/stdlib/gpu/realloc.cpp b/libc/src/stdlib/gpu/realloc.cpp
index d4929014a4b95..97ad1b3bbeff2 100644
--- a/libc/src/stdlib/gpu/realloc.cpp
+++ b/libc/src/stdlib/gpu/realloc.cpp
@@ -21,6 +21,8 @@ LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) {
#ifndef LIBC_TARGET_ARCH_IS_NVPTX
return gpu::reallocate(ptr, size);
#else
+ (void)ptr;
+ (void)size;
return nullptr;
#endif
}
>From 0153feb1ec9a18a3bb61a9b869f591ecc57ddf70 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 27 Jun 2025 14:36:10 -0500
Subject: [PATCH 3/3] launder
---
libc/src/__support/GPU/allocator.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 7a4a342e20b0f..b2f2953e4f285 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -560,8 +560,8 @@ void *reallocate(void *ptr, uint64_t size) {
return nullptr;
// The original slab pointer is the 2MiB boundary using the given pointer.
- Slab *slab = reinterpret_cast<Slab *>(
- (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
+ Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
+ (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
if (slab->get_chunk_size() >= size)
return ptr;
More information about the libc-commits
mailing list