[libc-commits] [libc] [libc][stdlib] Add EnvironmentManager (PR #195260)

Jeff Bailey via libc-commits libc-commits at lists.llvm.org
Wed May 6 00:18:28 PDT 2026


https://github.com/kaladron updated https://github.com/llvm/llvm-project/pull/195260

>From 1f4639fc8d9b5ab512d0fcb9f4f7839d8c467e2d Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Fri, 1 May 2026 11:45:33 +0100
Subject: [PATCH 1/2] [libc][stdlib] Add EnvironmentManager

Introduced an EnvironmentManager singleton that centralises environment
variable state: the environ array, per-string ownership tracking, and
capacity management. The manager exposes a minimal public API (get,
begin/end iterators) and keeps all internal state private.

Refactored getenv to delegate to EnvironmentManager::get() rather than
directly iterating app.env_ptr.

The ownership tracking and capacity management are preparatory
infrastructure for setenv.
---
 libc/src/stdlib/CMakeLists.txt       |  20 ++-
 libc/src/stdlib/environ_internal.cpp | 179 +++++++++++++++++++++++++++
 libc/src/stdlib/environ_internal.h   | 123 ++++++++++++++++++
 libc/src/stdlib/getenv.cpp           |  35 ++----
 4 files changed, 330 insertions(+), 27 deletions(-)
 create mode 100644 libc/src/stdlib/environ_internal.cpp
 create mode 100644 libc/src/stdlib/environ_internal.h

diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 4265e0de57654..1185f1a0fa461 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -62,7 +62,25 @@ add_entrypoint_object(
   HDRS
     getenv.h
   DEPENDS
-  libc.config.app_h
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    .environ_internal
+)
+
+add_object_library(
+  environ_internal
+  SRCS
+    environ_internal.cpp
+  HDRS
+    environ_internal.h
+  DEPENDS
+    libc.config.app_h
+    libc.hdr.types.size_t
+    libc.src.__support.CPP.new
+    libc.src.__support.CPP.optional
+    libc.src.__support.CPP.string_view
+    libc.src.__support.macros.attributes
+    libc.src.__support.macros.config
 )
 
 add_entrypoint_object(
diff --git a/libc/src/stdlib/environ_internal.cpp b/libc/src/stdlib/environ_internal.cpp
new file mode 100644
index 0000000000000..0dd39293d652e
--- /dev/null
+++ b/libc/src/stdlib/environ_internal.cpp
@@ -0,0 +1,179 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of internal environment management utilities.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/environ_internal.h"
+#include "config/app.h"
+#include "src/__support/CPP/new.h"
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/alloc-checker.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+// Minimum initial capacity for the environment array when first allocated.
+// This avoids frequent reallocations for small environments.
+constexpr size_t MIN_ENVIRON_CAPACITY = 32;
+
+// Growth factor for environment array capacity when expanding.
+// When capacity is exceeded, new_capacity = old_capacity *
+// ENVIRON_GROWTH_FACTOR.
+constexpr size_t ENVIRON_GROWTH_FACTOR = 2;
+
+void EnvironmentManager::init_once() {
+  if (initialized)
+    return;
+
+  // Count entries in the startup environ.
+  char **env_ptr = reinterpret_cast<char **>(app.env_ptr);
+  if (env_ptr) {
+    size_t c = 0;
+    for (char **env = env_ptr; *env != nullptr; env++)
+      c++;
+    count = c;
+  }
+
+  initialized = true;
+}
+
+EnvironmentManager &EnvironmentManager::get_instance() {
+  static EnvironmentManager mgr;
+  mgr.init_once();
+  return mgr;
+}
+
+char **EnvironmentManager::get_array() {
+  if (is_ours)
+    return storage;
+  return reinterpret_cast<char **>(app.env_ptr);
+}
+
+EnvironmentManager::iterator EnvironmentManager::begin() { return get_array(); }
+
+EnvironmentManager::iterator EnvironmentManager::end() {
+  return get_array() + count;
+}
+
+size_t EnvironmentManager::size() const { return count; }
+
+char *EnvironmentManager::get(cpp::string_view name) {
+  cpp::optional<size_t> idx = find_var(name);
+  if (!idx)
+    return nullptr;
+  return get_array()[*idx] + name.size() + 1;
+}
+
+cpp::optional<size_t> EnvironmentManager::find_var(cpp::string_view name) {
+  char **env_array = get_array();
+  if (!env_array)
+    return cpp::nullopt;
+
+  for (size_t i = 0; i < count; i++) {
+    cpp::string_view current(env_array[i]);
+    if (current.starts_with(name) && current.size() > name.size() &&
+        current[name.size()] == '=')
+      return i;
+  }
+
+  return cpp::nullopt;
+}
+
+// Helper: allocate new storage and ownership arrays of the given capacity,
+// copy the first `copy_count` entries from old_storage/old_ownership, and
+// initialize the remaining ownership slots to default (not-owned).
+// Returns false on allocation failure; on failure the old arrays are untouched.
+bool EnvironmentManager::alloc_and_copy(size_t new_capacity, char **old_storage,
+                                        EnvStringOwnership *old_ownership,
+                                        size_t copy_count, char **&out_storage,
+                                        EnvStringOwnership *&out_ownership) {
+  AllocChecker ac;
+  char **new_storage = new (ac) char *[new_capacity + 1];
+  if (!ac)
+    return false;
+
+  EnvStringOwnership *new_ownership =
+      new (ac) EnvStringOwnership[new_capacity + 1];
+  if (!ac) {
+    delete[] new_storage;
+    return false;
+  }
+
+  for (size_t i = 0; i < copy_count; i++) {
+    new_storage[i] = old_storage ? old_storage[i] : nullptr;
+    new_ownership[i] = old_ownership ? old_ownership[i] : EnvStringOwnership();
+  }
+  new_storage[copy_count] = nullptr;
+
+  out_storage = new_storage;
+  out_ownership = new_ownership;
+  return true;
+}
+
+bool EnvironmentManager::ensure_capacity(size_t needed) {
+  // If we're still using the startup environ (pointed to by app.env_ptr),
+  // we must transition to our own managed storage. This allows us to
+  // track ownership of strings and safely expand the array.
+  if (!is_ours) {
+    char **old_env = reinterpret_cast<char **>(app.env_ptr);
+
+    // Allocate new array with room to grow.
+    size_t new_capacity = needed < MIN_ENVIRON_CAPACITY
+                              ? MIN_ENVIRON_CAPACITY
+                              : needed * ENVIRON_GROWTH_FACTOR;
+
+    char **new_storage = nullptr;
+    EnvStringOwnership *new_ownership = nullptr;
+    if (!alloc_and_copy(new_capacity, old_env, nullptr, count, new_storage,
+                        new_ownership))
+      return false;
+
+    storage = new_storage;
+    ownership = new_ownership;
+    capacity = new_capacity;
+    is_ours = true;
+
+    // Update the global environ pointer.
+    app.env_ptr = reinterpret_cast<uintptr_t *>(storage);
+
+    return true;
+  }
+
+  // We already own the environment array. Check if it's large enough.
+  if (needed <= capacity)
+    return true;
+
+  // Grow capacity. We avoid realloc to ensure that failures don't leave the
+  // manager in an inconsistent state.
+  size_t new_capacity = needed * ENVIRON_GROWTH_FACTOR;
+
+  char **new_storage = nullptr;
+  EnvStringOwnership *new_ownership = nullptr;
+  if (!alloc_and_copy(new_capacity, storage, ownership, count, new_storage,
+                      new_ownership))
+    return false;
+
+  delete[] storage;
+  delete[] ownership;
+
+  storage = new_storage;
+  ownership = new_ownership;
+  capacity = new_capacity;
+
+  // Update the global environ pointer.
+  app.env_ptr = reinterpret_cast<uintptr_t *>(storage);
+
+  return true;
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/environ_internal.h b/libc/src/stdlib/environ_internal.h
new file mode 100644
index 0000000000000..1ce214dd0f439
--- /dev/null
+++ b/libc/src/stdlib/environ_internal.h
@@ -0,0 +1,123 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Internal utilities for environment variable management.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_ENVIRON_INTERNAL_H
+#define LLVM_LIBC_SRC_STDLIB_ENVIRON_INTERNAL_H
+
+#include "hdr/types/size_t.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+// Ownership information for environment strings.
+// We need to track ownership because environment strings come from three
+// sources:
+// 1. Startup environment (from program loader) - we don't own these
+// 2. putenv() calls where caller provides the string - we don't own these
+// 3. setenv() calls where we allocate the string - we DO own these
+// Only strings we allocated can be freed when replaced or removed.
+struct EnvStringOwnership {
+  bool allocated_by_us; // True if we allocated this string (must delete).
+                        // False for startup environ or putenv strings (don't
+                        // free).
+
+  // Default: not owned by us (startup or putenv - don't free).
+  LIBC_INLINE EnvStringOwnership() : allocated_by_us(false) {}
+
+  // Returns true if this string can be safely freed.
+  LIBC_INLINE bool can_free() const { return allocated_by_us; }
+};
+
+// Centralized manager for environment variable operations.
+// This class encapsulates all state and operations related to environment
+// management, including memory management and tracking of string ownership.
+//
+// The manager provides iterator support, allowing callers to iterate over
+// the current environment entries using standard begin()/end() semantics.
+class EnvironmentManager {
+  // Our allocated environ array (nullptr if using startup environ)
+  char **storage = nullptr;
+
+  // Parallel array tracking ownership of each environ string.
+  // Allocated with the same capacity as storage.
+  EnvStringOwnership *ownership = nullptr;
+
+  // Allocated capacity of storage
+  size_t capacity = 0;
+
+  // Current number of variables in environ
+  size_t count = 0;
+
+  // True if we have initialized from the startup environment
+  bool initialized = false;
+
+  // True if we allocated storage (and are responsible for freeing it)
+  bool is_ours = false;
+
+  EnvironmentManager() = default;
+  ~EnvironmentManager() = default;
+
+  // Lazily initialize from the startup environment.
+  // Called internally by get_instance(); idempotent.
+  void init_once();
+
+  // Get a pointer to the current environ array.
+  // This may be app.env_ptr (startup environ) or storage (our copy).
+  char **get_array();
+
+  // Search for a variable by name in the current environ array.
+  // Returns the index if found, or nullopt if not found.
+  cpp::optional<size_t> find_var(cpp::string_view name);
+
+  // Ensure environ has capacity for at least `needed` entries (plus null
+  // terminator). May allocate or reallocate storage. Returns true on
+  // success, false on allocation failure.
+  bool ensure_capacity(size_t needed);
+
+  // Helper: allocate new storage and ownership arrays of the given capacity,
+  // copy the first `copy_count` entries from old_storage/old_ownership, and
+  // initialize the remaining ownership slots to default (not-owned).
+  // Returns false on allocation failure.
+  bool alloc_and_copy(size_t new_capacity, char **old_storage,
+                      EnvStringOwnership *old_ownership, size_t copy_count,
+                      char **&out_storage, EnvStringOwnership *&out_ownership);
+
+public:
+  // Get the singleton instance of the environment manager.
+  static EnvironmentManager &get_instance();
+
+  // Delete copy and move operations to enforce singleton pattern.
+  EnvironmentManager(const EnvironmentManager &) = delete;
+  EnvironmentManager &operator=(const EnvironmentManager &) = delete;
+  EnvironmentManager(EnvironmentManager &&) = delete;
+  EnvironmentManager &operator=(EnvironmentManager &&) = delete;
+
+  // Iterator support for traversing environment entries.
+  using iterator = char **;
+  iterator begin();
+  iterator end();
+  size_t size() const;
+
+  // Look up a variable by name. Returns a pointer to the value string
+  // (after the '='), or nullptr if not found.
+  char *get(cpp::string_view name);
+};
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDLIB_ENVIRON_INTERNAL_H
diff --git a/libc/src/stdlib/getenv.cpp b/libc/src/stdlib/getenv.cpp
index e6ef03fad5c51..32a75122e7de0 100644
--- a/libc/src/stdlib/getenv.cpp
+++ b/libc/src/stdlib/getenv.cpp
@@ -1,45 +1,28 @@
-//===-- Implementation of getenv ------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of the POSIX getenv function.
+///
+//===----------------------------------------------------------------------===//
 
 #include "src/stdlib/getenv.h"
-#include "config/app.h"
-#include "src/__support/CPP/string_view.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
-
-#include <stddef.h> // For size_t.
+#include "src/stdlib/environ_internal.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(char *, getenv, (const char *name)) {
-  char **env_ptr = reinterpret_cast<char **>(LIBC_NAMESPACE::app.env_ptr);
-
-  if (name == nullptr || env_ptr == nullptr)
+  if (name == nullptr || name[0] == '\0')
     return nullptr;
 
-  LIBC_NAMESPACE::cpp::string_view env_var_name(name);
-  if (env_var_name.size() == 0)
-    return nullptr;
-  for (char **env = env_ptr; *env != nullptr; env++) {
-    LIBC_NAMESPACE::cpp::string_view cur(*env);
-    if (!cur.starts_with(env_var_name))
-      continue;
-
-    if (cur[env_var_name.size()] != '=')
-      continue;
-
-    // Remove the name and the equals sign.
-    cur.remove_prefix(env_var_name.size() + 1);
-    // We know that data is null terminated, so this is safe.
-    return const_cast<char *>(cur.data());
-  }
-
-  return nullptr;
+  return internal::EnvironmentManager::get_instance().get(name);
 }
 
 } // namespace LIBC_NAMESPACE_DECL

>From 1ded636632a22df2ee3e11be6f9e349c4489888d Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Wed, 6 May 2026 08:18:18 +0100
Subject: [PATCH 2/2] Update libc/src/stdlib/environ_internal.cpp

Co-authored-by: Michael Jones <michaelrj at google.com>
---
 libc/src/stdlib/environ_internal.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/stdlib/environ_internal.cpp b/libc/src/stdlib/environ_internal.cpp
index 0dd39293d652e..63cda23f578ba 100644
--- a/libc/src/stdlib/environ_internal.cpp
+++ b/libc/src/stdlib/environ_internal.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/stdlib/environ_internal.h"
-#include "config/app.h"
+#include "src/stdlib/config/app.h"
 #include "src/__support/CPP/new.h"
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/alloc-checker.h"



More information about the libc-commits mailing list