[clang-tools-extra] d7747da - [clangd] Also detect corrupt stri table size.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 19 11:11:33 PST 2020


Author: Sam McCall
Date: 2020-11-19T20:11:14+01:00
New Revision: d7747dacba8e4a7784ea8ef20abbff87d5681b81

URL: https://github.com/llvm/llvm-project/commit/d7747dacba8e4a7784ea8ef20abbff87d5681b81
DIFF: https://github.com/llvm/llvm-project/commit/d7747dacba8e4a7784ea8ef20abbff87d5681b81.diff

LOG: [clangd] Also detect corrupt stri table size.

Differential Revision: https://reviews.llvm.org/D91299

Added: 
    

Modified: 
    clang-tools-extra/clangd/index/Serialization.cpp
    clang-tools-extra/clangd/unittests/SerializationTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp
index 5d48edf81343..bba5eaa36754 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -223,6 +223,15 @@ llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
   if (UncompressedSize == 0) // No compression
     Uncompressed = R.rest();
   else if (llvm::zlib::isAvailable()) {
+    // Don't allocate a massive buffer if UncompressedSize was corrupted
+    // This is effective for sharded index, but not big monolithic ones, as
+    // once compressed size reaches 4MB nothing can be ruled out.
+    // Theoretical max ratio from https://zlib.net/zlib_tech.html
+    constexpr int MaxCompressionRatio = 1032;
+    if (UncompressedSize / MaxCompressionRatio > R.rest().size())
+      return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
+                   R.rest().size(), UncompressedSize);
+
     if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
                                                UncompressedSize))
       return std::move(E);

diff  --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp
index 68d1d1a0df8b..c4995cd0de19 100644
--- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp
@@ -14,6 +14,7 @@
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
@@ -383,6 +384,46 @@ TEST(SerializationTest, NoCrashOnBadArraySize) {
   EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
             "malformed or truncated include uri");
 }
+
+// Check we detect invalid string table size size without allocating it first.
+// If this detection fails, the test should allocate a huge array and crash.
+TEST(SerializationTest, NoCrashOnBadStringTableSize) {
+  if (!llvm::zlib::isAvailable()) {
+    log("skipping test, no zlib");
+    return;
+  }
+
+  // First, create a valid serialized file.
+  auto In = readIndexFile(YAML);
+  ASSERT_FALSE(!In) << In.takeError();
+  IndexFileOut Out(*In);
+  Out.Format = IndexFileFormat::RIFF;
+  std::string Serialized = llvm::to_string(Out);
+
+  // Low-level parse it again, we're going to replace the `stri` chunk.
+  auto Parsed = riff::readFile(Serialized);
+  ASSERT_FALSE(!Parsed) << Parsed.takeError();
+  auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
+    return C.ID == riff::fourCC("stri");
+  });
+  ASSERT_NE(Stri, Parsed->Chunks.end());
+
+  // stri consists of an 8 byte uncompressed-size, and then compressed data.
+  // We'll claim our small amount of data expands to 4GB
+  std::string CorruptStri =
+      (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str();
+  Stri->Data = CorruptStri;
+  std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
+
+  // Try to crash rather than hang on large allocation.
+  ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
+
+  std::string CorruptFile = llvm::to_string(*Parsed);
+  auto CorruptParsed = readIndexFile(CorruptFile);
+  ASSERT_TRUE(!CorruptParsed);
+  EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
+              testing::HasSubstr("bytes is implausible"));
+}
 #endif
 
 } // namespace


        


More information about the cfe-commits mailing list