[Lldb-commits] [lldb] b6087ba - Disable LLDB index cache for .o files with no UUID.

Greg Clayton via lldb-commits lldb-commits at lists.llvm.org
Tue Apr 5 15:14:43 PDT 2022


Author: Greg Clayton
Date: 2022-04-05T15:14:36-07:00
New Revision: b6087ba769c612c031b84e6673c438fe44a46c6a

URL: https://github.com/llvm/llvm-project/commit/b6087ba769c612c031b84e6673c438fe44a46c6a
DIFF: https://github.com/llvm/llvm-project/commit/b6087ba769c612c031b84e6673c438fe44a46c6a.diff

LOG: Disable LLDB index cache for .o files with no UUID.

After enabling the LLDB index cache in production we discovered that some distributed build systems play with the modification times of any .o files that were downloaded from the build cache. This was causing the LLDB index cache to read the wrong cache file for files that didn't have a UUID as all of the modfication times were set to the same value by the build system. When new .o files were downloaded, the only unique identifier was the mod time which were all the same, and we would load an older cache for the updated .o file. So disabling caching of files that have no UUIDs for now until we can create a more solid solution.

Differential Revision: https://reviews.llvm.org/D120948

Added: 
    

Modified: 
    lldb/include/lldb/Core/DataFileCache.h
    lldb/source/Core/DataFileCache.cpp
    lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py
    lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Core/DataFileCache.h b/lldb/include/lldb/Core/DataFileCache.h
index 6f7de679f8679..cf794078ff9e3 100644
--- a/lldb/include/lldb/Core/DataFileCache.h
+++ b/lldb/include/lldb/Core/DataFileCache.h
@@ -119,22 +119,22 @@ struct CacheSignature {
     m_obj_mod_time = llvm::None;
   }
 
-  /// Return true if any of the signature member variables have valid values.
-  bool IsValid() const {
-    return m_uuid.hasValue() || m_mod_time.hasValue() ||
-           m_obj_mod_time.hasValue();
-  }
+  /// Return true only if the CacheSignature is valid.
+  ///
+  /// Cache signatures are considered valid only if there is a UUID in the file
+  /// that can uniquely identify the file. Some build systems play with
+  /// modification times of file so we can not trust them without using valid
+  /// unique idenifier like the UUID being valid.
+  bool IsValid() const { return m_uuid.hasValue(); }
 
   /// Check if two signatures are the same.
-  bool operator!=(const CacheSignature &rhs) {
-    if (m_uuid != rhs.m_uuid)
-      return true;
-    if (m_mod_time != rhs.m_mod_time)
-      return true;
-    if (m_obj_mod_time != rhs.m_obj_mod_time)
-      return true;
-    return false;
+  bool operator==(const CacheSignature &rhs) const {
+    return m_uuid == rhs.m_uuid && m_mod_time == rhs.m_mod_time &&
+           m_obj_mod_time == rhs.m_obj_mod_time;
   }
+
+  /// Check if two signatures 
diff er.
+  bool operator!=(const CacheSignature &rhs) const { return !(*this == rhs); }
   /// Encode this object into a data encoder object.
   ///
   /// This allows this object to be serialized to disk. The CacheSignature
@@ -149,7 +149,7 @@ struct CacheSignature {
   ///   True if a signature was encoded, and false if there were no member
   ///   variables that had value. False indicates this data should not be
   ///   cached to disk because we were unable to encode a valid signature.
-  bool Encode(DataEncoder &encoder);
+  bool Encode(DataEncoder &encoder) const;
 
   /// Decode a serialized version of this object from data.
   ///

diff  --git a/lldb/source/Core/DataFileCache.cpp b/lldb/source/Core/DataFileCache.cpp
index f9b632f34e417..5f8568fdb54f2 100644
--- a/lldb/source/Core/DataFileCache.cpp
+++ b/lldb/source/Core/DataFileCache.cpp
@@ -199,7 +199,7 @@ enum SignatureEncoding {
   eSignatureEnd = 255u,
 };
 
-bool CacheSignature::Encode(DataEncoder &encoder) {
+bool CacheSignature::Encode(DataEncoder &encoder) const {
   if (!IsValid())
     return false; // Invalid signature, return false!
 
@@ -240,10 +240,14 @@ bool CacheSignature::Decode(const lldb_private::DataExtractor &data,
     case eSignatureObjectModTime: {
       uint32_t mod_time = data.GetU32(offset_ptr);
       if (mod_time > 0)
-        m_mod_time = mod_time;
+        m_obj_mod_time = mod_time;
     } break;
     case eSignatureEnd:
-      return true;
+      // The definition of is valid changed to only be valid if the UUID is
+      // valid so make sure that if we attempt to decode an old cache file
+      // that we will fail to decode the cache file if the signature isn't
+      // considered valid.
+      return IsValid();
     default:
       break;
     }

diff  --git a/lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py b/lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py
index 5f6104f41c3c0..fdd7d7904f833 100644
--- a/lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py
+++ b/lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py
@@ -41,6 +41,25 @@ def get_module_cache_files(self, basename):
     @skipUnlessDarwin
     def test(self):
         """
+            This test has been modified to make sure .o files that don't have
+            UUIDs are not cached after discovering build systems that play with
+            modification times of .o files that the modification times are not
+            unique enough to ensure the .o file within the .a file are the right
+            files as this was causing older cache files to be accepted for new
+            updated .o files.
+
+            ELF .o files do calculate a UUID from the contents of the file,
+            which is expensive, but no one loads .o files into a debug sessions
+            when using ELF files. Mach-o .o files do not have UUID values and do
+            no calculate one as they _are_ used during debug sessions when no
+            dSYM file is generated. If we can find a way to uniquely and cheaply
+            create UUID values for mach-o .o files in the future, this test will
+            be updated to test this functionality. This test will now make sure
+            there are no cache entries for any .o files in BSD archives.
+
+            The old test case description is below in case we enable caching for
+            .o files again:
+
             Test module cache functionality for bsd archive object files.
 
             This will test that if we enable the module cache, we have a
@@ -76,10 +95,20 @@ def test(self):
         main_module.GetNumSymbols()
         a_o_cache_files = self.get_module_cache_files("libfoo.a(a.o)")
         b_o_cache_files = self.get_module_cache_files("libfoo.a(b.o)")
+
+
         # We expect the directory for a.o to have two cache directories:
         # - 1 for the a.o with a earlier mod time
         # - 1 for the a.o that was renamed from c.o that should be 2 seconds older
-        self.assertEqual(len(a_o_cache_files), 2,
-                "make sure there are two files in the module cache directory (%s) for libfoo.a(a.o)" % (self.cache_dir))
-        self.assertEqual(len(b_o_cache_files), 1,
-                "make sure there are two files in the module cache directory (%s) for libfoo.a(b.o)" % (self.cache_dir))
+        # self.assertEqual(len(a_o_cache_files), 2,
+        #         "make sure there are two files in the module cache directory (%s) for libfoo.a(a.o)" % (self.cache_dir))
+        # self.assertEqual(len(b_o_cache_files), 1,
+        #         "make sure there are two files in the module cache directory (%s) for libfoo.a(b.o)" % (self.cache_dir))
+
+        # We are no longer caching .o files in the lldb index cache. If we ever
+        # re-enable this functionality, we can uncomment out the above lines of
+        # code.
+        self.assertEqual(len(a_o_cache_files), 0,
+                "make sure there are no files in the module cache directory (%s) for libfoo.a(a.o)" % (self.cache_dir))
+        self.assertEqual(len(b_o_cache_files), 0,
+                "make sure there are no files in the module cache directory (%s) for libfoo.a(b.o)" % (self.cache_dir))

diff  --git a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
index 29514b5d1fcf5..0fb7f46068be7 100644
--- a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp
@@ -196,3 +196,75 @@ TEST(DWARFIndexCachingTest, ManualDWARFIndexIndexSetEncodeDecode) {
       DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset));
   EncodeDecode(set);
 }
+
+static void EncodeDecode(const CacheSignature &object, ByteOrder byte_order,
+                         bool encode_result) {
+  const uint8_t addr_size = 8;
+  DataEncoder encoder(byte_order, addr_size);
+  EXPECT_EQ(encode_result, object.Encode(encoder));
+  if (!encode_result)
+    return;
+  llvm::ArrayRef<uint8_t> bytes = encoder.GetData();
+  DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size);
+  offset_t data_offset = 0;
+  CacheSignature decoded_object;
+  EXPECT_TRUE(decoded_object.Decode(data, &data_offset));
+  EXPECT_EQ(object, decoded_object);
+}
+
+static void EncodeDecode(const CacheSignature &object, bool encode_result) {
+  EncodeDecode(object, eByteOrderLittle, encode_result);
+  EncodeDecode(object, eByteOrderBig, encode_result);
+}
+
+TEST(DWARFIndexCachingTest, CacheSignatureTests) {
+  CacheSignature sig;
+  // A cache signature is only considered valid if it has a UUID.
+  sig.m_mod_time = 0x12345678;
+  EXPECT_FALSE(sig.IsValid());
+  EncodeDecode(sig, /*encode_result=*/false);
+  sig.Clear();
+
+  sig.m_obj_mod_time = 0x12345678;
+  EXPECT_FALSE(sig.IsValid());
+  EncodeDecode(sig, /*encode_result=*/false);
+  sig.Clear();
+
+  sig.m_uuid = UUID::fromData("@\x00\x11\x22\x33\x44\x55\x66\x77", 8);
+  EXPECT_TRUE(sig.IsValid());
+  EncodeDecode(sig, /*encode_result=*/true);
+  sig.m_mod_time = 0x12345678;
+  EXPECT_TRUE(sig.IsValid());
+  EncodeDecode(sig, /*encode_result=*/true);
+  sig.m_obj_mod_time = 0x456789ab;
+  EXPECT_TRUE(sig.IsValid());
+  EncodeDecode(sig, /*encode_result=*/true);
+  sig.m_mod_time = llvm::None;
+  EXPECT_TRUE(sig.IsValid());
+  EncodeDecode(sig, /*encode_result=*/true);
+
+  // Recent changes do not allow cache signatures with only a modification time
+  // or object modification time, so make sure if we try to decode such a cache
+  // file that we fail. This verifies that if we try to load an previously
+  // valid cache file where the signature is insufficient, that we will fail to
+  // decode and load these cache files.
+  DataEncoder encoder(eByteOrderLittle, /*addr_size=*/8);
+  encoder.AppendU8(2); // eSignatureModTime
+  encoder.AppendU32(0x12345678);
+  encoder.AppendU8(255); // eSignatureEnd
+
+  llvm::ArrayRef<uint8_t> bytes = encoder.GetData();
+  DataExtractor data(bytes.data(), bytes.size(), eByteOrderLittle,
+                     /*addr_size=*/8);
+  offset_t data_offset = 0;
+
+  // Make sure we fail to decode a CacheSignature with only a mod time
+  EXPECT_FALSE(sig.Decode(data, &data_offset));
+
+  // Change the signature data to contain only a eSignatureObjectModTime and
+  // make sure decoding fails as well.
+  encoder.PutU8(/*offset=*/0, 3); // eSignatureObjectModTime
+  data_offset = 0;
+  EXPECT_FALSE(sig.Decode(data, &data_offset));
+
+}


        


More information about the lldb-commits mailing list