[libcxx-commits] [libcxx] [ASan][libc++] Annotating `std::basic_string` with all allocators (PR #75845)

via libcxx-commits libcxx-commits at lists.llvm.org
Mon Dec 18 17:48:27 PST 2023


https://github.com/AdvenamTacet updated https://github.com/llvm/llvm-project/pull/75845

>From d297670ad62f75d64c8fbef867d2e487390a4a1d Mon Sep 17 00:00:00 2001
From: Advenam Tacet <advenam.tacet at trailofbits.com>
Date: Mon, 18 Dec 2023 20:34:49 +0100
Subject: [PATCH] [ASan][libc++] Annotating `std::basic_string` with all
 allocators

This commit turns on ASan annotations in `std::basic_string` for all allocators by default.

Originally suggested here: https://reviews.llvm.org/D146214

This commit is part of our efforts to support container annotations with (almost) every allocator.
Annotating `std::basic_string` with default allocator is implemented in https://github.com/llvm/llvm-project/pull/72677.

Support in ASan API exests since https://github.com/llvm/llvm-project/commit/dd1b7b797a116eed588fd752fbe61d34deeb24e4.
This patch removes the check in std::basic_string annotation member function (__annotate_contiguous_container) to support different allocators.

You can turn off annotations for a specific allocator based on changes from https://github.com/llvm/llvm-project/commit/2fa1bec7a20bb23f2e6620085adb257dafaa3be0.

This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds.

The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability.

If you have any questions, please email:
- advenam.tacet at trailofbits.com
- disconnect3d at trailofbits.com
---
 libcxx/include/string                         |  3 +-
 .../strings/basic.string/asan.pass.cpp        | 54 +++++++++++++
 .../basic.string/asan_turning_off.pass.cpp    | 78 +++++++++++++++++++
 libcxx/test/support/asan_testing.h            |  2 +-
 4 files changed, 134 insertions(+), 3 deletions(-)
 create mode 100644 libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
 create mode 100644 libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp

diff --git a/libcxx/include/string b/libcxx/include/string
index fdffca5aed18be..ce2df8da8a91eb 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1891,8 +1891,7 @@ private:
 #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
     const void* __begin = data();
     const void* __end   = data() + capacity() + 1;
-    if (!__libcpp_is_constant_evaluated() && __begin != nullptr &&
-        is_same<allocator_type, __default_allocator_type>::value)
+    if (!__libcpp_is_constant_evaluated() && __asan_annotate_container_with_allocator<allocator_type>::value)
       __sanitizer_annotate_contiguous_container(__begin, __end, __old_mid, __new_mid);
 #endif
   }
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
new file mode 100644
index 00000000000000..b578b0fadffcde
--- /dev/null
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan.pass.cpp
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: asan
+// UNSUPPORTED: c++03
+
+// Basic test if ASan annotations work for basic_string.
+
+#include <string>
+#include <cassert>
+#include <cstdlib>
+
+#include "asan_testing.h"
+#include "min_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+
+extern "C" void __sanitizer_set_death_callback(void (*callback)(void));
+
+void do_exit() { exit(0); }
+
+int main(int, char**) {
+  {
+    typedef cpp17_input_iterator<char*> MyInputIter;
+    // Should not trigger ASan.
+    std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
+    char i[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e',
+                'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
+
+    v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 29));
+    assert(v[0] == 'a');
+    assert(is_string_asan_correct(v));
+  }
+
+  __sanitizer_set_death_callback(do_exit);
+  {
+    using T     = char;
+    using C     = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
+    const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e',
+                   'f', 'g', 'h', 'i', 'j', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
+    C c(std::begin(t), std::end(t));
+    assert(is_string_asan_correct(c));
+    assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
+           0);
+    volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
+    assert(false);                    // if we got here, ASAN didn't trigger
+    ((void)foo);
+  }
+}
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp
new file mode 100644
index 00000000000000..4e12e3c86248d9
--- /dev/null
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan_turning_off.pass.cpp
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: asan
+// UNSUPPORTED: c++03
+
+// <string>
+
+// Test based on: https://bugs.chromium.org/p/chromium/issues/detail?id=1419798#c5
+// Some allocators during deallocation may not call destructors and just reuse memory.
+// In those situations, one may want to deactivate annotations for a specific allocator.
+// It's possible with __asan_annotate_container_with_allocator template class.
+// This test confirms that those allocators work after turning off annotations.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string>
+#include <new>
+
+struct reuse_allocator {
+  static size_t const N = 100;
+  reuse_allocator() {
+    for (size_t i = 0; i < N; ++i)
+      __buffers[i] = malloc(8 * 1024);
+  }
+  ~reuse_allocator() {
+    for (size_t i = 0; i < N; ++i)
+      free(__buffers[i]);
+  }
+  void* alloc() {
+    assert(__next_id < N);
+    return __buffers[__next_id++];
+  }
+  void reset() { __next_id = 0; }
+  void* __buffers[N];
+  size_t __next_id = 0;
+} reuse_buffers;
+
+template <typename T>
+struct user_allocator {
+  using value_type = T;
+  user_allocator() = default;
+  template <class U>
+  user_allocator(user_allocator<U>) {}
+  friend bool operator==(user_allocator, user_allocator) { return true; }
+  friend bool operator!=(user_allocator x, user_allocator y) { return !(x == y); }
+
+  T* allocate(size_t) { return (T*)reuse_buffers.alloc(); }
+  void deallocate(T*, size_t) noexcept {}
+};
+
+template <class T>
+struct std::__asan_annotate_container_with_allocator<user_allocator<T>> {
+  static bool const value = false;
+};
+
+int main() {
+  using S = std::basic_string<char, std::char_traits<char>, user_allocator<char>>;
+
+  {
+    S* s = new (reuse_buffers.alloc()) S();
+    for (int i = 0; i < 100; i++)
+      s->push_back('a');
+  }
+  reuse_buffers.reset();
+  {
+    S s;
+    for (int i = 0; i < 1000; i++)
+      s.push_back('b');
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/support/asan_testing.h b/libcxx/test/support/asan_testing.h
index 2dfec5c42b00b2..6bfc8280a4ead3 100644
--- a/libcxx/test/support/asan_testing.h
+++ b/libcxx/test/support/asan_testing.h
@@ -75,7 +75,7 @@ TEST_CONSTEXPR bool is_string_asan_correct(const std::basic_string<ChrT, TraitsT
     return true;
 
   if (!is_string_short(c) || _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED) {
-    if (std::is_same<Alloc, std::allocator<ChrT>>::value)
+    if (std::__asan_annotate_container_with_allocator<Alloc>::value)
       return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
              0;
     else



More information about the libcxx-commits mailing list