[libcxx-commits] [libcxx] a16f81f - Revert "[ASan][libc++] Turn on ASan annotations for short strings (#79049)"

Thurston Dang via libcxx-commits libcxx-commits at lists.llvm.org
Tue Jan 23 14:57:41 PST 2024


Author: Thurston Dang
Date: 2024-01-23T22:56:22Z
New Revision: a16f81f5e3313e88f96de35e5edfe8bee463d308

URL: https://github.com/llvm/llvm-project/commit/a16f81f5e3313e88f96de35e5edfe8bee463d308
DIFF: https://github.com/llvm/llvm-project/commit/a16f81f5e3313e88f96de35e5edfe8bee463d308.diff

LOG: Revert "[ASan][libc++] Turn on ASan annotations for short strings (#79049)"

This reverts commit cb528ec5e6331ce207c7b835d7ab963bd5e13af7.

Reason: buildbot breakage (https://lab.llvm.org/buildbot/#/builders/5/builds/40364):
SUMMARY: AddressSanitizer: container-overflow /b/sanitizer-x86_64-linux-fast/build/libcxx_build_asan_ubsan/include/c++/v1/string:1870:29 in __get_long_pointer

Added: 
    

Modified: 
    libcxx/include/string
    libcxx/test/support/asan_testing.h

Removed: 
    libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
    libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp
    libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp


################################################################################
diff  --git a/libcxx/include/string b/libcxx/include/string
index 4116f350a804764..e97139206d4fa7c 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -659,6 +659,7 @@ _LIBCPP_PUSH_MACROS
 #else
 #  define _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
 #endif
+#define _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED false
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
@@ -1895,17 +1896,22 @@ private:
 #endif
   }
 
+  // ASan: short string is poisoned if and only if this function returns true.
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __asan_short_string_is_annotated() const _NOEXCEPT {
+    return _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED && !__libcpp_is_constant_evaluated();
+  }
+
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_new(size_type __current_size) const _NOEXCEPT {
     (void) __current_size;
 #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
-    if (!__libcpp_is_constant_evaluated())
+    if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
       __annotate_contiguous_container(data() + capacity() + 1, data() + __current_size + 1);
 #endif
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_delete() const _NOEXCEPT {
 #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
-    if (!__libcpp_is_constant_evaluated())
+    if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
       __annotate_contiguous_container(data() + size() + 1, data() + capacity() + 1);
 #endif
   }
@@ -1913,7 +1919,7 @@ private:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_increase(size_type __n) const _NOEXCEPT {
     (void) __n;
 #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
-    if (!__libcpp_is_constant_evaluated())
+    if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
       __annotate_contiguous_container(data() + size() + 1, data() + size() + 1 + __n);
 #endif
   }
@@ -1921,7 +1927,7 @@ private:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_shrink(size_type __old_size) const _NOEXCEPT {
     (void) __old_size;
 #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
-    if (!__libcpp_is_constant_evaluated())
+    if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
       __annotate_contiguous_container(data() + __old_size + 1, data() + size() + 1);
 #endif
   }

diff  --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
deleted file mode 100644
index 1205190b3a6e131..000000000000000
--- a/libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// REQUIRES: asan
-// UNSUPPORTED: c++03
-
-#include <cassert>
-#include <string>
-#include <array>
-#include <deque>
-#include "test_macros.h"
-#include "asan_testing.h"
-#include "min_allocator.h"
-
-// This tests exists to check if strings work well with deque, as those
-// may be partialy annotated, we cannot simply call
-// is_double_ended_contiguous_container_asan_correct, as it assumes that
-// object memory inside is not annotated, so we check everything in a more careful way.
-
-template <typename D>
-void verify_inside(D const& d) {
-  for (size_t i = 0; i < d.size(); ++i) {
-    assert(is_string_asan_correct(d[i]));
-  }
-}
-
-template <typename S, size_t N>
-S get_s(char c) {
-  S s;
-  for (size_t i = 0; i < N; ++i)
-    s.push_back(c);
-
-  return s;
-}
-
-template <class C, class S>
-void test_string() {
-  size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
-
-  {
-    C d1a(1), d1b(N), d1c(N + 1), d1d(5 * N);
-    verify_inside(d1a);
-    verify_inside(d1b);
-    verify_inside(d1c);
-    verify_inside(d1d);
-  }
-  {
-    C d2;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      d2.push_back(get_s<S, 1>(i % 10 + 'a'));
-      verify_inside(d2);
-      d2.push_back(get_s<S, 22>(i % 10 + 'b'));
-      verify_inside(d2);
-
-      d2.pop_front();
-      verify_inside(d2);
-    }
-  }
-  {
-    C d3;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      d3.push_front(get_s<S, 1>(i % 10 + 'a'));
-      verify_inside(d3);
-      d3.push_front(get_s<S, 28>(i % 10 + 'b'));
-      verify_inside(d3);
-
-      d3.pop_back();
-      verify_inside(d3);
-    }
-  }
-  {
-    C d4;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      // When there is no SSO, all elements inside should not be poisoned,
-      // so we can verify deque poisoning.
-      d4.push_front(get_s<S, 33>(i % 10 + 'a'));
-      verify_inside(d4);
-      assert(is_double_ended_contiguous_container_asan_correct(d4));
-      d4.push_back(get_s<S, 28>(i % 10 + 'b'));
-      verify_inside(d4);
-      assert(is_double_ended_contiguous_container_asan_correct(d4));
-    }
-  }
-  {
-    C d5;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      // In d4 we never had poisoned memory inside deque.
-      // Here we start with SSO, so part of the inside of the container,
-      // will be poisoned.
-      d5.push_front(S());
-      verify_inside(d5);
-    }
-    for (size_t i = 0; i < d5.size(); ++i) {
-      // We change the size to have long string.
-      // Memory owne by deque should not be poisoned by string.
-      d5[i].resize(100);
-      verify_inside(d5);
-    }
-
-    assert(is_double_ended_contiguous_container_asan_correct(d5));
-
-    d5.erase(d5.begin() + 2);
-    verify_inside(d5);
-
-    d5.erase(d5.end() - 2);
-    verify_inside(d5);
-
-    assert(is_double_ended_contiguous_container_asan_correct(d5));
-  }
-  {
-    C d6a;
-    assert(is_double_ended_contiguous_container_asan_correct(d6a));
-
-    C d6b(N + 2, get_s<S, 100>('a'));
-    d6b.push_front(get_s<S, 101>('b'));
-    while (!d6b.empty()) {
-      d6b.pop_back();
-      assert(is_double_ended_contiguous_container_asan_correct(d6b));
-    }
-
-    C d6c(N + 2, get_s<S, 102>('c'));
-    while (!d6c.empty()) {
-      d6c.pop_back();
-      assert(is_double_ended_contiguous_container_asan_correct(d6c));
-    }
-  }
-  {
-    C d7(9 * N + 2);
-
-    d7.insert(d7.begin() + 1, S());
-    verify_inside(d7);
-
-    d7.insert(d7.end() - 3, S());
-    verify_inside(d7);
-
-    d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
-    verify_inside(d7);
-
-    d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
-    verify_inside(d7);
-
-    d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
-    verify_inside(d7);
-
-    // It may not be short for big element types, but it will be checked correctly:
-    d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
-    verify_inside(d7);
-
-    d7.erase(d7.begin() + 2);
-    verify_inside(d7);
-
-    d7.erase(d7.end() - 2);
-    verify_inside(d7);
-  }
-}
-
-template <class S>
-void test_container() {
-  test_string<std::deque<S, std::allocator<S>>, S>();
-  test_string<std::deque<S, min_allocator<S>>, S>();
-  test_string<std::deque<S, safe_allocator<S>>, S>();
-}
-
-int main(int, char**) {
-  // Those tests support only types based on std::basic_string.
-  test_container<std::string>();
-  test_container<std::wstring>();
-#if TEST_STD_VER >= 11
-  test_container<std::u16string>();
-  test_container<std::u32string>();
-#endif
-#if TEST_STD_VER >= 20
-  test_container<std::u8string>();
-#endif
-
-  return 0;
-}

diff  --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp
deleted file mode 100644
index 53c70bed189b5c1..000000000000000
--- a/libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// REQUIRES: asan
-// UNSUPPORTED: c++03
-
-// <string>
-
-// Basic test if ASan annotations work for short strings.
-
-#include <string>
-#include <cassert>
-#include <cstdlib>
-
-#include "asan_testing.h"
-#include "min_allocator.h"
-#include "test_iterators.h"
-#include "test_macros.h"
-
-extern "C" void __sanitizer_set_death_callback(void (*callback)(void));
-
-void do_exit() { exit(0); }
-
-int main(int, char**) {
-  {
-    typedef cpp17_input_iterator<char*> MyInputIter;
-    // Should not trigger ASan.
-    std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
-    char i[] = {'a', 'b', 'c', 'd'};
-
-    v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4));
-    assert(v[0] == 'a');
-    assert(is_string_asan_correct(v));
-  }
-
-  __sanitizer_set_death_callback(do_exit);
-  {
-    using T     = char;
-    using C     = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
-    const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
-    C c(std::begin(t), std::end(t));
-    assert(is_string_asan_correct(c));
-    assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
-           0);
-    volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
-    assert(false);                    // if we got here, ASAN didn't trigger
-    ((void)foo);
-  }
-
-  return 0;
-}

diff  --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp
deleted file mode 100644
index b7d95b7069083ae..000000000000000
--- a/libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// REQUIRES: asan
-// UNSUPPORTED: c++03
-
-#include <cassert>
-#include <string>
-#include <vector>
-#include <array>
-#include "test_macros.h"
-#include "asan_testing.h"
-#include "min_allocator.h"
-
-// This tests exists to check if strings work well with vector, as those
-// may be partialy annotated, we cannot simply call
-// is_contiguous_container_asan_correct, as it assumes that
-// object memory inside is not annotated, so we check everything in a more careful way.
-
-template <typename D>
-void verify_inside(D const& d) {
-  for (size_t i = 0; i < d.size(); ++i) {
-    assert(is_string_asan_correct(d[i]));
-  }
-}
-
-template <typename S, size_t N>
-S get_s(char c) {
-  S s;
-  for (size_t i = 0; i < N; ++i)
-    s.push_back(c);
-
-  return s;
-}
-
-template <class C, class S>
-void test_string() {
-  size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
-
-  {
-    C d1a(1), d1b(N), d1c(N + 1), d1d(5 * N);
-    verify_inside(d1a);
-    verify_inside(d1b);
-    verify_inside(d1c);
-    verify_inside(d1d);
-  }
-  {
-    C d2;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      d2.push_back(get_s<S, 1>(i % 10 + 'a'));
-      verify_inside(d2);
-      d2.push_back(get_s<S, 28>(i % 10 + 'b'));
-      verify_inside(d2);
-
-      d2.erase(d2.cbegin());
-      verify_inside(d2);
-    }
-  }
-  {
-    C d3;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      d3.push_back(get_s<S, 1>(i % 10 + 'a'));
-      verify_inside(d3);
-      d3.push_back(get_s<S, 28>(i % 10 + 'b'));
-      verify_inside(d3);
-
-      d3.pop_back();
-      verify_inside(d3);
-    }
-  }
-  {
-    C d4;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      // When there is no SSO, all elements inside should not be poisoned,
-      // so we can verify vector poisoning.
-      d4.push_back(get_s<S, 33>(i % 10 + 'a'));
-      verify_inside(d4);
-      assert(is_contiguous_container_asan_correct(d4));
-      d4.push_back(get_s<S, 28>(i % 10 + 'b'));
-      verify_inside(d4);
-      assert(is_contiguous_container_asan_correct(d4));
-    }
-  }
-  {
-    C d5;
-    for (size_t i = 0; i < 3 * N + 2; ++i) {
-      // In d4 we never had poisoned memory inside vector.
-      // Here we start with SSO, so part of the inside of the container,
-      // will be poisoned.
-      d5.push_back(S());
-      verify_inside(d5);
-    }
-    for (size_t i = 0; i < d5.size(); ++i) {
-      // We change the size to have long string.
-      // Memory owne by vector should not be poisoned by string.
-      d5[i].resize(100);
-      verify_inside(d5);
-    }
-
-    assert(is_contiguous_container_asan_correct(d5));
-
-    d5.erase(d5.begin() + 2);
-    verify_inside(d5);
-
-    d5.erase(d5.end() - 2);
-    verify_inside(d5);
-
-    assert(is_contiguous_container_asan_correct(d5));
-  }
-  {
-    C d6a;
-    assert(is_contiguous_container_asan_correct(d6a));
-
-    C d6b(N + 2, get_s<S, 100>('a'));
-    d6b.push_back(get_s<S, 101>('b'));
-    while (!d6b.empty()) {
-      d6b.pop_back();
-      assert(is_contiguous_container_asan_correct(d6b));
-    }
-
-    C d6c(N + 2, get_s<S, 102>('c'));
-    while (!d6c.empty()) {
-      d6c.pop_back();
-      assert(is_contiguous_container_asan_correct(d6c));
-    }
-  }
-  {
-    C d7(9 * N + 2);
-
-    d7.insert(d7.begin() + 1, S());
-    verify_inside(d7);
-
-    d7.insert(d7.end() - 3, S());
-    verify_inside(d7);
-
-    d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
-    verify_inside(d7);
-
-    d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
-    verify_inside(d7);
-
-    d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
-    verify_inside(d7);
-
-    // It may not be short for big element types, but it will be checked correctly:
-    d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
-    verify_inside(d7);
-
-    d7.erase(d7.begin() + 2);
-    verify_inside(d7);
-
-    d7.erase(d7.end() - 2);
-    verify_inside(d7);
-  }
-}
-
-template <class S>
-void test_container() {
-  test_string<std::vector<S, std::allocator<S>>, S>();
-  test_string<std::vector<S, min_allocator<S>>, S>();
-  test_string<std::vector<S, safe_allocator<S>>, S>();
-}
-
-int main(int, char**) {
-  // Those tests support only types based on std::basic_string.
-  test_container<std::string>();
-  test_container<std::wstring>();
-#if TEST_STD_VER >= 11
-  test_container<std::u16string>();
-  test_container<std::u32string>();
-#endif
-#if TEST_STD_VER >= 20
-  test_container<std::u8string>();
-#endif
-
-  return 0;
-}

diff  --git a/libcxx/test/support/asan_testing.h b/libcxx/test/support/asan_testing.h
index 3785c1f9c20dea1..6bfc8280a4ead30 100644
--- a/libcxx/test/support/asan_testing.h
+++ b/libcxx/test/support/asan_testing.h
@@ -56,16 +56,35 @@ TEST_CONSTEXPR bool is_double_ended_contiguous_container_asan_correct(const std:
 #endif
 
 #if TEST_HAS_FEATURE(address_sanitizer)
+template <typename S>
+bool is_string_short(S const& s) {
+  // We do not have access to __is_long(), but we can check if strings
+  // buffer is inside strings memory. If strings memory contains its content,
+  // SSO is in use. To check it, we can just confirm that the beginning is in
+  // the string object memory block.
+  // &s    - beginning of objects memory
+  // &s[0] - beginning of the buffer
+  // (&s+1) - end of objects memory
+  return (void*)std::addressof(s) <= (void*)std::addressof(s[0]) &&
+         (void*)std::addressof(s[0]) < (void*)(std::addressof(s) + 1);
+}
+
 template <typename ChrT, typename TraitsT, typename Alloc>
 TEST_CONSTEXPR bool is_string_asan_correct(const std::basic_string<ChrT, TraitsT, Alloc>& c) {
   if (TEST_IS_CONSTANT_EVALUATED)
     return true;
 
-  if (std::__asan_annotate_container_with_allocator<Alloc>::value)
-    return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) != 0;
-  else
-    return __sanitizer_verify_contiguous_container(
-               c.data(), c.data() + c.capacity() + 1, c.data() + c.capacity() + 1) != 0;
+  if (!is_string_short(c) || _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED) {
+    if (std::__asan_annotate_container_with_allocator<Alloc>::value)
+      return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
+             0;
+    else
+      return __sanitizer_verify_contiguous_container(
+                 c.data(), c.data() + c.capacity() + 1, c.data() + c.capacity() + 1) != 0;
+  } else {
+    return __sanitizer_verify_contiguous_container(std::addressof(c), std::addressof(c) + 1, std::addressof(c) + 1) !=
+           0;
+  }
 }
 #else
 #  include <string>


        


More information about the libcxx-commits mailing list