[libcxx-commits] [libcxx] [ASan][libc++] Annotating `std::basic_string` with all allocators (PR #75845)

via libcxx-commits libcxx-commits at lists.llvm.org
Wed Dec 27 01:04:41 PST 2023


https://github.com/AdvenamTacet updated https://github.com/llvm/llvm-project/pull/75845

>From d297670ad62f75d64c8fbef867d2e487390a4a1d Mon Sep 17 00:00:00 2001
From: Advenam Tacet <advenam.tacet at trailofbits.com>
Date: Mon, 18 Dec 2023 20:34:49 +0100
Subject: [PATCH 1/2] [ASan][libc++] Annotating `std::basic_string` with all
 allocators

This commit turns on ASan annotations in `std::basic_string` for all allocators by default.

Originally suggested here: https://reviews.llvm.org/D146214

This commit is part of our efforts to support container annotations with (almost) every allocator.
Annotating `std::basic_string` with default allocator is implemented in https://github.com/llvm/llvm-project/pull/72677.

Support in ASan API exests since https://github.com/llvm/llvm-project/commit/dd1b7b797a116eed588fd752fbe61d34deeb24e4.
This patch removes the check in std::basic_string annotation member function (__annotate_contiguous_container) to support different allocators.

You can turn off annotations for a specific allocator based on changes from https://github.com/llvm/llvm-project/commit/2fa1bec7a20bb23f2e6620085adb257dafaa3be0.

This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds.

The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability.

If you have any questions, please email:
- advenam.tacet at trailofbits.com
- disconnect3d at trailofbits.com
---
 libcxx/include/string                         |  3 +-
 .../strings/basic.string/asan.pass.cpp        | 54 +++++++++++++
 .../basic.string/asan_turning_off.pass.cpp    | 78 +++++++++++++++++++
 libcxx/test/support/asan_testing.h            |  2 +-
 4 files changed, 134 insertions(+), 3 deletions(-)
 create mode 100644 libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
 create mode 100644 libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp

diff --git a/libcxx/include/string b/libcxx/include/string
index fdffca5aed18be..ce2df8da8a91eb 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1891,8 +1891,7 @@ private:
 #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
     const void* __begin = data();
     const void* __end   = data() + capacity() + 1;
-    if (!__libcpp_is_constant_evaluated() && __begin != nullptr &&
-        is_same<allocator_type, __default_allocator_type>::value)
+    if (!__libcpp_is_constant_evaluated() && __asan_annotate_container_with_allocator<allocator_type>::value)
       __sanitizer_annotate_contiguous_container(__begin, __end, __old_mid, __new_mid);
 #endif
   }
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
new file mode 100644
index 00000000000000..b578b0fadffcde
--- /dev/null
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: asan
+// UNSUPPORTED: c++03
+
+// Basic test if ASan annotations work for basic_string.
+
+#include <string>
+#include <cassert>
+#include <cstdlib>
+
+#include "asan_testing.h"
+#include "min_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+
+extern "C" void __sanitizer_set_death_callback(void (*callback)(void));
+
+void do_exit() { exit(0); }
+
+int main(int, char**) {
+  {
+    typedef cpp17_input_iterator<char*> MyInputIter;
+    // Should not trigger ASan.
+    std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
+    char i[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e',
+                'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
+
+    v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 29));
+    assert(v[0] == 'a');
+    assert(is_string_asan_correct(v));
+  }
+
+  __sanitizer_set_death_callback(do_exit);
+  {
+    using T     = char;
+    using C     = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
+    const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e',
+                   'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
+    C c(std::begin(t), std::end(t));
+    assert(is_string_asan_correct(c));
+    assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
+           0);
+    volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
+    assert(false);                    // if we got here, ASAN didn't trigger
+    ((void)foo);
+  }
+}
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp
new file mode 100644
index 00000000000000..4e12e3c86248d9
--- /dev/null
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: asan
+// UNSUPPORTED: c++03
+
+// <string>
+
+// Test based on: https://bugs.chromium.org/p/chromium/issues/detail?id=1419798#c5
+// Some allocators during deallocation may not call destructors and just reuse memory.
+// In those situations, one may want to deactivate annotations for a specific allocator.
+// It's possible with __asan_annotate_container_with_allocator template class.
+// This test confirms that those allocators work after turning off annotations.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string>
+#include <new>
+
+struct reuse_allocator {
+  static size_t const N = 100;
+  reuse_allocator() {
+    for (size_t i = 0; i < N; ++i)
+      __buffers[i] = malloc(8 * 1024);
+  }
+  ~reuse_allocator() {
+    for (size_t i = 0; i < N; ++i)
+      free(__buffers[i]);
+  }
+  void* alloc() {
+    assert(__next_id < N);
+    return __buffers[__next_id++];
+  }
+  void reset() { __next_id = 0; }
+  void* __buffers[N];
+  size_t __next_id = 0;
+} reuse_buffers;
+
+template <typename T>
+struct user_allocator {
+  using value_type = T;
+  user_allocator() = default;
+  template <class U>
+  user_allocator(user_allocator<U>) {}
+  friend bool operator==(user_allocator, user_allocator) { return true; }
+  friend bool operator!=(user_allocator x, user_allocator y) { return !(x == y); }
+
+  T* allocate(size_t) { return (T*)reuse_buffers.alloc(); }
+  void deallocate(T*, size_t) noexcept {}
+};
+
+template <class T>
+struct std::__asan_annotate_container_with_allocator<user_allocator<T>> {
+  static bool const value = false;
+};
+
+int main() {
+  using S = std::basic_string<char, std::char_traits<char>, user_allocator<char>>;
+
+  {
+    S* s = new (reuse_buffers.alloc()) S();
+    for (int i = 0; i < 100; i++)
+      s->push_back('a');
+  }
+  reuse_buffers.reset();
+  {
+    S s;
+    for (int i = 0; i < 1000; i++)
+      s.push_back('b');
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/support/asan_testing.h b/libcxx/test/support/asan_testing.h
index 2dfec5c42b00b2..6bfc8280a4ead3 100644
--- a/libcxx/test/support/asan_testing.h
+++ b/libcxx/test/support/asan_testing.h
@@ -75,7 +75,7 @@ TEST_CONSTEXPR bool is_string_asan_correct(const std::basic_string<ChrT, TraitsT
     return true;
 
   if (!is_string_short(c) || _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED) {
-    if (std::is_same<Alloc, std::allocator<ChrT>>::value)
+    if (std::__asan_annotate_container_with_allocator<Alloc>::value)
       return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
              0;
     else

>From b606f9ee8da6f9975192ec80156e64a84ea5c854 Mon Sep 17 00:00:00 2001
From: Advenam Tacet <advenam.tacet at trailofbits.com>
Date: Wed, 27 Dec 2023 10:54:36 +0100
Subject: [PATCH 2/2] Code review from EricWF

This commit addresses some comments from a code review:
- add size check,
- add coments with context to a test,
- add comments explaining what happens in the test,
- remove volatile,
- split an if to constexpr if and normal if.
---
 libcxx/include/string                         |  5 +--
 .../strings/basic.string/asan.pass.cpp        |  4 +--
 .../basic.string/asan_turning_off.pass.cpp    | 32 ++++++++++++++++---
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/libcxx/include/string b/libcxx/include/string
index ce2df8da8a91eb..27e93ac0e5e044 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1891,8 +1891,9 @@ private:
 #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
     const void* __begin = data();
     const void* __end   = data() + capacity() + 1;
-    if (!__libcpp_is_constant_evaluated() && __asan_annotate_container_with_allocator<allocator_type>::value)
-      __sanitizer_annotate_contiguous_container(__begin, __end, __old_mid, __new_mid);
+    if _LIBCPP_CONSTEXPR_SINCE_CXX17 (__asan_annotate_container_with_allocator<allocator_type>::value)
+      if(!__libcpp_is_constant_evaluated())
+        __sanitizer_annotate_contiguous_container(__begin, __end, __old_mid, __new_mid);
 #endif
   }
 
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
index b578b0fadffcde..88570a0e7daa76 100644
--- a/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
@@ -47,8 +47,8 @@ int main(int, char**) {
     assert(is_string_asan_correct(c));
     assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
            0);
-    volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
-    assert(false);                    // if we got here, ASAN didn't trigger
+    T foo = c[c.size() + 1]; // should trigger ASAN and call do_exit().
+    assert(false);           // if we got here, ASAN didn't trigger
     ((void)foo);
   }
 }
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp
index 4e12e3c86248d9..fd1785b3a4d584 100644
--- a/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp
@@ -16,12 +16,26 @@
 // In those situations, one may want to deactivate annotations for a specific allocator.
 // It's possible with __asan_annotate_container_with_allocator template class.
 // This test confirms that those allocators work after turning off annotations.
+//
+// A context to this test is a situations when memory is repurposed and destructors are not called.
+//   Related issue: https://github.com/llvm/llvm-project/issues/60384
+//
+// That issue appeared in the past and was addressed here: https://reviews.llvm.org/D145628
+//
+// There is also a question, if it's UB. And it's not clear.
+//   Related discussion: https://reviews.llvm.org/D136765#4155262
+//   Related notes: https://eel.is/c++draft/basic.life#6
+//
+// Even if it is UB, we want to make sure that it works that way, because people rely on this behavior.
+// In similar situations. a user explicitly turns off annotations for a specific allocator.
 
 #include <assert.h>
 #include <stdlib.h>
 #include <string>
 #include <new>
 
+// Allocator with pre-allocated (with malloc in constructor) buffers.
+// Memory may be freed without calling destructors.
 struct reuse_allocator {
   static size_t const N = 100;
   reuse_allocator() {
@@ -50,10 +64,15 @@ struct user_allocator {
   friend bool operator==(user_allocator, user_allocator) { return true; }
   friend bool operator!=(user_allocator x, user_allocator y) { return !(x == y); }
 
-  T* allocate(size_t) { return (T*)reuse_buffers.alloc(); }
+  T* allocate(size_t n) {
+    if (n * sizeof(T) > 8 * 1024)
+      throw std::bad_array_new_length();
+    return (T*)reuse_buffers.alloc();
+  }
   void deallocate(T*, size_t) noexcept {}
 };
 
+// Turn off annotations for user_allocator:
 template <class T>
 struct std::__asan_annotate_container_with_allocator<user_allocator<T>> {
   static bool const value = false;
@@ -63,15 +82,20 @@ int main() {
   using S = std::basic_string<char, std::char_traits<char>, user_allocator<char>>;
 
   {
+    // Create a string with a buffer from reuse allocator object:
     S* s = new (reuse_buffers.alloc()) S();
-    for (int i = 0; i < 100; i++)
+    // Use string, so it's poisoned, if container annotations for that allocator are not turned off:
+    for (int i = 0; i < 40; i++)
       s->push_back('a');
   }
+  // Reset the state of the allocator, don't call destructors, allow memory to be reused:
   reuse_buffers.reset();
   {
+    // Create a next string with the same allocator, so the same buffer due to the reset:
     S s;
-    for (int i = 0; i < 1000; i++)
-      s.push_back('b');
+    // Use memory inside the string again, if it's poisoned, an error will be raised:
+    for (int i = 0; i < 60; i++)
+      s.push_back('a');
   }
 
   return 0;



More information about the libcxx-commits mailing list