[libc-commits] [libc] [libc] Efficiently implement 'realloc' for AMDGPU devices (PR #145960)

Fri Jun 27 12:36:24 PDT 2025

https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/145960

>From 797fbf03c7bc547e0a2f054e4da522d1eb09fdec Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 26 Jun 2025 15:05:54 -0500
Subject: [PATCH 1/3] [libc] Efficiently implement 'realloc' for AMDGPU devices

Summary:
Now that we have `malloc` we can implement `realloc` efficiently. This
uses the known chunk sizes to avoid unnecessary allocations. We just
return nullptr for NVPTX. I'd remove the list for the entrypoint but
then the libc++ code would stop working. When someone writes the NVPTX
support this will be trivial.
---
 libc/src/__support/GPU/allocator.cpp          | 22 ++++++++++
 libc/src/__support/GPU/allocator.h            |  1 +
 libc/src/stdlib/gpu/realloc.cpp               | 18 +++-----
 .../integration/src/stdlib/gpu/CMakeLists.txt | 15 +++++++
 .../integration/src/stdlib/gpu/realloc.cpp    | 44 +++++++++++++++++++
 5 files changed, 89 insertions(+), 11 deletions(-)
 create mode 100644 libc/test/integration/src/stdlib/gpu/realloc.cpp

diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 5ea27a9c44b66..7a4a342e20b0f 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -22,6 +22,7 @@
 #include "src/__support/GPU/utils.h"
 #include "src/__support/RPC/rpc_client.h"
 #include "src/__support/threads/sleep.h"
+#include "src/string/memory_utils/inline_memcpy.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -550,5 +551,26 @@ void deallocate(void *ptr) {
   release_slab(slab);
 }
 
+void *reallocate(void *ptr, uint64_t size) {
+  if (ptr == nullptr)
+    return gpu::allocate(size);
+
+  // Non-slab allocations are considered foreign pointers so we fail.
+  if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
+    return nullptr;
+
+  // The original slab pointer is the 2MiB boundary using the given pointer.
+  Slab *slab = reinterpret_cast<Slab *>(
+      (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
+  if (slab->get_chunk_size() >= size)
+    return ptr;
+
+  // If we need a new chunk we reallocate and copy it over.
+  void *new_ptr = gpu::allocate(size);
+  inline_memcpy(new_ptr, ptr, slab->get_chunk_size());
+  gpu::deallocate(ptr);
+  return new_ptr;
+}
+
 } // namespace gpu
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/GPU/allocator.h b/libc/src/__support/GPU/allocator.h
index 466009aa71981..757f3a406015b 100644
--- a/libc/src/__support/GPU/allocator.h
+++ b/libc/src/__support/GPU/allocator.h
@@ -17,6 +17,7 @@ namespace gpu {
 
 void *allocate(uint64_t size);
 void deallocate(void *ptr);
+void *reallocate(void *ptr, uint64_t size);
 
 } // namespace gpu
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/gpu/realloc.cpp b/libc/src/stdlib/gpu/realloc.cpp
index 4fd4d6b278179..d4929014a4b95 100644
--- a/libc/src/stdlib/gpu/realloc.cpp
+++ b/libc/src/stdlib/gpu/realloc.cpp
@@ -16,17 +16,13 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) {
-  if (ptr == nullptr)
-    return gpu::allocate(size);
-
-  void *newmem = gpu::allocate(size);
-  if (newmem == nullptr)
-    return nullptr;
-
-  // This will copy garbage if it goes beyond the old allocation size.
-  inline_memcpy(newmem, ptr, size);
-  gpu::deallocate(ptr);
-  return newmem;
+  // FIXME: NVIDIA targets currently use the built-in 'malloc' which we cannot
+  // reason with. But we still need to provide this function for compatibility.
+#ifndef LIBC_TARGET_ARCH_IS_NVPTX
+  return gpu::reallocate(ptr, size);
+#else
+  return nullptr;
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
index 26c877b1b6ae6..69e1909fe78ed 100644
--- a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
+++ b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
@@ -17,6 +17,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
       --blocks 1024
   )
 
+  add_integration_test(
+    realloc
+    SUITE
+      stdlib-gpu-integration-tests
+    SRCS
+      realloc.cpp
+    DEPENDS
+      libc.src.stdlib.malloc
+      libc.src.stdlib.free
+      libc.src.stdlib.realloc
+    LOADER_ARGS
+      --threads 256
+      --blocks 1024
+  )
+
   add_integration_test(
     malloc_stress
     SUITE
diff --git a/libc/test/integration/src/stdlib/gpu/realloc.cpp b/libc/test/integration/src/stdlib/gpu/realloc.cpp
new file mode 100644
index 0000000000000..40b261ff872b9
--- /dev/null
+++ b/libc/test/integration/src/stdlib/gpu/realloc.cpp
@@ -0,0 +1,44 @@
+#include "test/IntegrationTest/test.h"
+
+#include "src/__support/GPU/utils.h"
+#include "src/stdlib/free.h"
+#include "src/stdlib/malloc.h"
+#include "src/stdlib/realloc.h"
+
+using namespace LIBC_NAMESPACE;
+
+TEST_MAIN(int, char **, char **) {
+  // realloc(nullptr, size) is equivalent to malloc.
+  int *alloc = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(nullptr, 32));
+  EXPECT_NE(alloc, nullptr);
+  *alloc = 42;
+  EXPECT_EQ(*alloc, 42);
+
+  // realloc to same size returns the same pointer.
+  void *same = LIBC_NAMESPACE::realloc(alloc, 32);
+  EXPECT_NE(same, nullptr);
+  EXPECT_EQ(reinterpret_cast<int *>(same)[0], 42);
+
+  // realloc to smaller size returns same pointer.
+  void *smaller = LIBC_NAMESPACE::realloc(same, 16);
+  EXPECT_NE(smaller, nullptr);
+  EXPECT_EQ(reinterpret_cast<int *>(smaller)[0], 42);
+
+  // realloc to larger size returns new pointer and preserves contents.
+  int *larger = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(smaller, 128));
+  EXPECT_NE(larger, nullptr);
+  EXPECT_EQ(larger[0], 42);
+
+  // realloc works when called with a divergent size.
+  int *div = reinterpret_cast<int *>(
+      LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
+  EXPECT_NE(div, nullptr);
+  div[0] = static_cast<int>(gpu::get_thread_id());
+  int *div_realloc = reinterpret_cast<int *>(
+      LIBC_NAMESPACE::realloc(div, ((gpu::get_thread_id() + 1) * 32)));
+  EXPECT_NE(div_realloc, nullptr);
+  EXPECT_EQ(div_realloc[0], static_cast<int>(gpu::get_thread_id()));
+  LIBC_NAMESPACE::free(div_realloc);
+
+  return 0;
+}

>From e77c6fa7a4a2e1b16c1411e63ceb9e81a00ad330 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 26 Jun 2025 17:26:49 -0500
Subject: [PATCH 2/3] Fix NVPTX error

---
 libc/src/stdlib/gpu/realloc.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libc/src/stdlib/gpu/realloc.cpp b/libc/src/stdlib/gpu/realloc.cpp
index d4929014a4b95..97ad1b3bbeff2 100644
--- a/libc/src/stdlib/gpu/realloc.cpp
+++ b/libc/src/stdlib/gpu/realloc.cpp
@@ -21,6 +21,8 @@ LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) {
 #ifndef LIBC_TARGET_ARCH_IS_NVPTX
   return gpu::reallocate(ptr, size);
 #else
+  (void)ptr;
+  (void)size;
   return nullptr;
 #endif
 }

>From 0153feb1ec9a18a3bb61a9b869f591ecc57ddf70 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 27 Jun 2025 14:36:10 -0500
Subject: [PATCH 3/3] launder

---
 libc/src/__support/GPU/allocator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 7a4a342e20b0f..b2f2953e4f285 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -560,8 +560,8 @@ void *reallocate(void *ptr, uint64_t size) {
     return nullptr;
 
   // The original slab pointer is the 2MiB boundary using the given pointer.
-  Slab *slab = reinterpret_cast<Slab *>(
-      (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
+  Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
+      (reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
   if (slab->get_chunk_size() >= size)
     return ptr;