[Lldb-commits] [lldb] b6b6540 - [lldb] Add tests which simulate the various std::string layouts

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Thu Jun 30 23:12:42 PDT 2022


Author: Pavel Labath
Date: 2022-07-01T08:08:22+02:00
New Revision: b6b65403b3826af5404f874f32d7abd8ef8a8a96

URL: https://github.com/llvm/llvm-project/commit/b6b65403b3826af5404f874f32d7abd8ef8a8a96
DIFF: https://github.com/llvm/llvm-project/commit/b6b65403b3826af5404f874f32d7abd8ef8a8a96.diff

LOG: [lldb] Add tests which simulate the various std::string layouts

Checking whether a formatter change does not break some of the supported
string layouts is difficult because it requires tracking down and/or
building different versions and build configurations of the library.

The purpose of this patch is to avoid that by providing an in-tree
simulation of the string class. It is a reduced version of the real
string class, obtained by elimitating all non-trivial code, leaving
just the internal data structures used by the data formatter. Different
versions of the class can be simulated through preprocessor defines.

The test (ab)uses the fact that our formatters kick in for any
double-underscore sub-namespace of `std`, so it avoids colliding with
the real string class by declaring the test class in the std::__lldb
namespace.

I do not consider this to be a replacement for the existing data
formatter tests, as producing this kind of a test is not trivial, and it
is easy to make a mistake in the process. However, it's also not
realistic to expect that every person changing the data formatter will
test it against all versions of the real class, so I think it can be
useful as a first line of defence.

Adding support for new layouts can become particularly unwieldy, but
this complexity will also be reflected in the actual code, so if we find
ourselves needing to support too many variants, we may need to start
dropping support for old ones, or come up with a completely different
strategy.

Differential Revision: https://reviews.llvm.org/D124155

Added: 
    lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/Makefile
    lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/TestDataFormatterLibcxxStringSimulator.py
    lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/main.cpp

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/Makefile
new file mode 100644
index 000000000000..38cfa8105348
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+override CXXFLAGS_EXTRAS += -std=c++14
+include Makefile.rules

diff  --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/TestDataFormatterLibcxxStringSimulator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/TestDataFormatterLibcxxStringSimulator.py
new file mode 100644
index 000000000000..76cd64660aab
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/TestDataFormatterLibcxxStringSimulator.py
@@ -0,0 +1,45 @@
+"""
+Test we can understand various layouts of the libc++'s std::string
+"""
+
+
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class LibcxxStringDataFormatterSimulatorTestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def _run_test(self, defines):
+        cxxflags_extras = " ".join(["-D%s" % d for d in defines])
+        self.build(dictionary=dict(CXXFLAGS_EXTRAS=cxxflags_extras))
+        lldbutil.run_to_source_breakpoint(self, '// Break here',
+                lldb.SBFileSpec("main.cpp"))
+        self.expect_var_path("shortstring", summary='"short"')
+        self.expect_var_path("longstring", summary='"I am a very long string"')
+
+    def test_v1_layout(self):
+        """ Current v1 layout. """
+        self._run_test([])
+
+    def test_v2_layout(self):
+        """ Current v2 layout. """
+        self._run_test(["ALTERNATE_LAYOUT"])
+
+    def test_v1_layout_bitmasks(self):
+        """ Pre-D123580 v1 layout. """
+        self._run_test(["BITMASKS"])
+
+    def test_v2_layout_bitmasks(self):
+        """ Pre-D123580 v2 layout. """
+        self._run_test(["ALTERNATE_LAYOUT", "BITMASKS"])
+
+    def test_v2_layout_subclass_padding(self):
+        """ Pre-c3d0205ee771 v2 layout. """
+        self._run_test(["ALTERNATE_LAYOUT", "BITMASKS", "SUBCLASS_PADDING"])
+

diff  --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/main.cpp
new file mode 100644
index 000000000000..4852dfd45668
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/simulator/main.cpp
@@ -0,0 +1,217 @@
+#include <climits>
+#include <memory>
+#include <type_traits>
+
+namespace std {
+namespace __lldb {
+
+template <class _Tp, int _Idx,
+          bool _CanBeEmptyBase =
+              std::is_empty<_Tp>::value && !std::is_final<_Tp>::value>
+struct __compressed_pair_elem {
+  explicit __compressed_pair_elem(_Tp __t) : __value_(__t) {}
+
+  _Tp &__get() { return __value_; }
+
+private:
+  _Tp __value_;
+};
+
+template <class _Tp, int _Idx>
+struct __compressed_pair_elem<_Tp, _Idx, true> : private _Tp {
+  explicit __compressed_pair_elem(_Tp __t) : _Tp(__t) {}
+
+  _Tp &__get() { return *this; }
+};
+
+template <class _T1, class _T2>
+class __compressed_pair : private __compressed_pair_elem<_T1, 0>,
+                          private __compressed_pair_elem<_T2, 1> {
+public:
+  using _Base1 = __compressed_pair_elem<_T1, 0>;
+  using _Base2 = __compressed_pair_elem<_T2, 1>;
+
+  explicit __compressed_pair(_T1 __t1, _T2 __t2) : _Base1(__t1), _Base2(__t2) {}
+
+  _T1 &first() { return static_cast<_Base1 &>(*this).__get(); }
+};
+
+#if defined(ALTERNATE_LAYOUT) && defined(SUBCLASS_PADDING)
+template <class _CharT, size_t = sizeof(_CharT)> struct __padding {
+  unsigned char __xx[sizeof(_CharT) - 1];
+};
+
+template <class _CharT> struct __padding<_CharT, 1> {};
+#endif
+
+template <class _CharT, class _Traits, class _Allocator> class basic_string {
+public:
+  typedef _CharT value_type;
+  typedef _Allocator allocator_type;
+  typedef allocator_traits<allocator_type> __alloc_traits;
+  typedef typename __alloc_traits::size_type size_type;
+  typedef typename __alloc_traits::pointer pointer;
+
+#ifdef ALTERNATE_LAYOUT
+
+  struct __long {
+    pointer __data_;
+    size_type __size_;
+#ifdef BITMASKS
+    size_type __cap_;
+#else
+    size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
+    size_type __is_long_ : 1;
+#endif
+  };
+
+  enum {
+    __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2
+                    ? (sizeof(__long) - 1) / sizeof(value_type)
+                    : 2
+  };
+
+  struct __short {
+    value_type __data_[__min_cap];
+#ifdef BITMASKS
+#ifdef SUBCLASS_PADDING
+    struct : __padding<value_type> {
+      unsigned char __size_;
+    };
+#else
+    unsigned char __padding[sizeof(value_type) - 1];
+    unsigned char __size_;
+#endif
+#else // !BITMASKS
+    unsigned char __size_ : 7;
+    unsigned char __is_long_ : 1;
+#endif
+  };
+
+#ifdef BITMASKS
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  static const size_type __short_shift = 1;
+  static const size_type __long_mask = 0x1ul;
+#else
+  static const size_type __short_shift = 0;
+  static const size_type __long_mask = ~(size_type(~0) >> 1);
+#endif
+#else // !BITMASKS
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  static const size_type __endian_factor = 2;
+#else
+  static const size_type __endian_factor = 1;
+#endif
+#endif // BITMASKS
+
+#else // !ALTERNATE_LAYOUT
+
+  struct __long {
+#ifdef BITMASKS
+    size_type __cap_;
+#else
+    size_type __is_long_ : 1;
+    size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
+#endif
+    size_type __size_;
+    pointer __data_;
+  };
+
+  enum {
+    __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2
+                    ? (sizeof(__long) - 1) / sizeof(value_type)
+                    : 2
+  };
+
+  struct __short {
+    union {
+#ifdef BITMASKS
+      unsigned char __size_;
+#else
+      struct {
+        unsigned char __is_long_ : 1;
+        unsigned char __size_ : 7;
+      };
+#endif
+      value_type __lx;
+    };
+    value_type __data_[__min_cap];
+  };
+
+#ifdef BITMASKS
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  static const size_type __short_shift = 0;
+  static const size_type __long_mask = ~(size_type(~0) >> 1);
+#else
+  static const size_type __short_shift = 1;
+  static const size_type __long_mask = 0x1ul;
+#endif
+#else // !BITMASKS
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  static const size_type __endian_factor = 1;
+#else
+  static const size_type __endian_factor = 2;
+#endif
+#endif
+
+#endif // ALTERNATE_LAYOUT
+
+  union __ulx {
+    __long __lx;
+    __short __lxx;
+  };
+
+  enum { __n_words = sizeof(__ulx) / sizeof(size_type) };
+
+  struct __raw {
+    size_type __words[__n_words];
+  };
+
+  struct __rep {
+    union {
+      __long __l;
+      __short __s;
+      __raw __r;
+    };
+  };
+
+  __compressed_pair<__rep, allocator_type> __r_;
+
+public:
+  template <size_t __N>
+  basic_string(unsigned char __size, const value_type (&__data)[__N])
+      : __r_({}, {}) {
+    static_assert(__N < __min_cap, "");
+#ifdef BITMASKS
+    __r_.first().__s.__size_ = __size << __short_shift;
+#else
+    __r_.first().__s.__size_ = __size;
+    __r_.first().__s.__is_long_ = false;
+#endif
+    for (size_t __i = 0; __i < __N; ++__i)
+      __r_.first().__s.__data_[__i] = __data[__i];
+  }
+  basic_string(size_t __cap, size_type __size, pointer __data) : __r_({}, {}) {
+#ifdef BITMASKS
+    __r_.first().__l.__cap_ = __cap | __long_mask;
+#else
+    __r_.first().__l.__cap_ = __cap / __endian_factor;
+    __r_.first().__l.__is_long_ = true;
+#endif
+    __r_.first().__l.__size_ = __size;
+    __r_.first().__l.__data_ = __data;
+  }
+};
+
+using string = basic_string<char, std::char_traits<char>, std::allocator<char>>;
+
+} // namespace __lldb
+} // namespace std
+
+int main() {
+  char longdata[] = "I am a very long string";
+  std::__lldb::string longstring(sizeof(longdata), sizeof(longdata) - 1,
+                                 longdata);
+  std::__lldb::string shortstring(5, "short");
+  return 0; // Break here
+}


        


More information about the lldb-commits mailing list