[llvm-branch-commits] [clang-tools-extra] [clang-doc] Simplify parsing and reading bitcode blocks (PR #190053)

Paul Kirth via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Apr 2 22:51:39 PDT 2026


https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/190053

>From d5d616d87b9a1e08ea156d55ca60a19e0369e82c Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Fri, 27 Mar 2026 19:33:33 +0000
Subject: [PATCH] [clang-doc] Simplify parsing and reading bitcode blocks

Much of the logic int he readBlock implementation is boilerplate, and is
repeated for each implementation/specialization. This will become much
worse as we introduce new custom block reading logic as we migrate
towards arena allocation. In preparation for that, we're introducing the
change in logic now, which should make later refactoring much more
straightforward.
---
 clang-tools-extra/clang-doc/BitcodeReader.cpp | 223 ++++++++----------
 clang-tools-extra/clang-doc/BitcodeReader.h   |   5 +
 clang-tools-extra/clang-doc/Representation.h  |   2 +-
 3 files changed, 109 insertions(+), 121 deletions(-)

diff --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp b/clang-tools-extra/clang-doc/BitcodeReader.cpp
index a3a73235cfbdf..b7f4d6aa7ba23 100644
--- a/clang-tools-extra/clang-doc/BitcodeReader.cpp
+++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp
@@ -379,17 +379,15 @@ static llvm::Error parseRecord(const Record &R, unsigned ID,
   }
 }
 
-template <>
-llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, CommentInfo *I) {
+template <typename T, typename BlockBeginHandler, typename BlockEndHandler,
+          typename RecordHandler>
+llvm::Error
+ClangDocBitcodeReader::parseBlock(unsigned ID, T I, BlockBeginHandler &&BBH,
+                                  BlockEndHandler &&BEH, RecordHandler &&RH) {
   llvm::TimeTraceScope("Reducing infos", "readBlock");
   if (llvm::Error Err = Stream.EnterSubBlock(ID))
     return Err;
 
-  llvm::SmallVector<CommentInfo> LocalChildren;
-  llvm::SmallVector<StringRef> AttrKeys;
-  llvm::SmallVector<StringRef> AttrValues;
-  llvm::SmallVector<StringRef> Args;
-
   while (true) {
     unsigned BlockOrCode = 0;
     llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
@@ -400,62 +398,89 @@ llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, CommentInfo *I) {
     case Cursor::BadBlock:
       return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                      "bad block found");
-    case Cursor::BlockEnd: {
-      if (!LocalChildren.empty())
-        I->Children = allocateArray<CommentInfo>(LocalChildren, TransientArena);
-      if (!AttrKeys.empty()) {
-        StringRef *KeysMem =
-            TransientArena.Allocate<StringRef>(AttrKeys.size());
-        std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
-        I->AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
-      }
-      if (!AttrValues.empty()) {
-        StringRef *ValuesMem =
-            TransientArena.Allocate<StringRef>(AttrValues.size());
-        std::uninitialized_copy(AttrValues.begin(), AttrValues.end(),
-                                ValuesMem);
-        I->AttrValues = llvm::ArrayRef<StringRef>(ValuesMem, AttrValues.size());
-      }
-      if (!Args.empty()) {
-        StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
-        std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
-        I->Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
-      }
+    case Cursor::BlockEnd:
+      if (llvm::Error Err = BEH())
+        return Err;
       return llvm::Error::success();
-    }
-    case Cursor::BlockBegin:
-      if (BlockOrCode == BI_COMMENT_BLOCK_ID) {
-        CommentInfo Child;
-        if (llvm::Error Err = readBlock(BlockOrCode, &Child)) {
-          if (llvm::Error Skipped = Stream.SkipBlock())
-            return joinErrors(std::move(Err), std::move(Skipped));
-          return Err;
-        }
-        LocalChildren.push_back(std::move(Child));
-      } else {
-        if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
-          if (llvm::Error Skipped = Stream.SkipBlock())
-            return joinErrors(std::move(Err), std::move(Skipped));
-          return Err;
-        }
+    case Cursor::BlockBegin: {
+      llvm::Expected<bool> Handled = BBH(BlockOrCode);
+      if (!Handled)
+        return Handled.takeError();
+      if (*Handled)
+        continue;
+
+      if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
+        if (llvm::Error Skipped = Stream.SkipBlock())
+          return joinErrors(std::move(Err), std::move(Skipped));
+        return Err;
       }
       continue;
+    }
     case Cursor::Record:
       break;
     }
 
-    Record R;
-    llvm::StringRef Blob;
-    llvm::Expected<unsigned> MaybeRecID =
-        Stream.readRecord(BlockOrCode, R, &Blob);
-    if (!MaybeRecID)
-      return MaybeRecID.takeError();
-    if (llvm::Error Err = parseRecord(R, MaybeRecID.get(), Blob, I, AttrKeys,
-                                      AttrValues, Args))
+    if (llvm::Error Err = RH(BlockOrCode))
       return Err;
   }
 }
 
+template <>
+llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, CommentInfo *I) {
+  llvm::SmallVector<CommentInfo> LocalChildren;
+  llvm::SmallVector<StringRef> AttrKeys;
+  llvm::SmallVector<StringRef> AttrValues;
+  llvm::SmallVector<StringRef> Args;
+
+  return parseBlock(
+      ID, I,
+      [&](unsigned BlockOrCode) -> llvm::Expected<bool> {
+        if (BlockOrCode == BI_COMMENT_BLOCK_ID) {
+          CommentInfo Child;
+          if (llvm::Error Err = readBlock(BlockOrCode, &Child))
+            return std::move(Err);
+          LocalChildren.push_back(std::move(Child));
+          return true;
+        }
+        return false;
+      },
+      [&]() -> llvm::Error {
+        if (!LocalChildren.empty())
+          I->Children =
+              allocateArray<CommentInfo>(LocalChildren, TransientArena);
+        if (!AttrKeys.empty()) {
+          StringRef *KeysMem =
+              TransientArena.Allocate<StringRef>(AttrKeys.size());
+          std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
+          I->AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
+        }
+        if (!AttrValues.empty()) {
+          StringRef *ValuesMem =
+              TransientArena.Allocate<StringRef>(AttrValues.size());
+          std::uninitialized_copy(AttrValues.begin(), AttrValues.end(),
+                                  ValuesMem);
+          I->AttrValues =
+              llvm::ArrayRef<StringRef>(ValuesMem, AttrValues.size());
+        }
+        if (!Args.empty()) {
+          StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
+          std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
+          I->Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
+        }
+        return llvm::Error::success();
+      },
+      [&](unsigned BlockOrCode) -> llvm::Error {
+        Record R;
+        llvm::StringRef Blob;
+        llvm::Expected<unsigned> MaybeRecID =
+            Stream.readRecord(BlockOrCode, R, &Blob);
+        if (!MaybeRecID)
+          return MaybeRecID.takeError();
+        return parseRecord(R, MaybeRecID.get(), Blob, I, AttrKeys, AttrValues,
+                           Args);
+      });
+}
+
 static llvm::Error parseRecord(const Record &R, unsigned ID,
                                llvm::StringRef Blob, Reference *I, FieldId &F) {
   switch (ID) {
@@ -929,80 +954,38 @@ llvm::Error ClangDocBitcodeReader::readRecord(unsigned ID, Reference *I) {
 // Read a block of records into a single info.
 template <typename T>
 llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, T I) {
-  llvm::TimeTraceScope("Reducing infos", "readBlock");
-  if (llvm::Error Err = Stream.EnterSubBlock(ID))
-    return Err;
-
-  while (true) {
-    unsigned BlockOrCode = 0;
-    llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
-    if (!C)
-      return C.takeError();
-
-    switch (*C) {
-    case Cursor::BadBlock:
-      return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                     "bad block found");
-    case Cursor::BlockEnd:
-      return llvm::Error::success();
-    case Cursor::BlockBegin:
-      if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
-        if (llvm::Error Skipped = Stream.SkipBlock())
-          return joinErrors(std::move(Err), std::move(Skipped));
-        return Err;
-      }
-      continue;
-    case Cursor::Record:
-      break;
-    }
-    if (auto Err = readRecord(BlockOrCode, I))
-      return Err;
-  }
+  return parseBlock(
+      ID, I, [](unsigned BlockOrCode) -> llvm::Expected<bool> { return false; },
+      []() -> llvm::Error { return llvm::Error::success(); },
+      [&](unsigned BlockOrCode) -> llvm::Error {
+        return readRecord(BlockOrCode, I);
+      });
 }
 
 template <>
 llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, FriendInfo *I) {
-  llvm::TimeTraceScope("Reducing infos", "readBlock");
-  if (llvm::Error Err = Stream.EnterSubBlock(ID))
-    return Err;
-
   llvm::SmallVector<FieldTypeInfo, 4> LocalParams;
 
-  while (true) {
-    unsigned BlockOrCode = 0;
-    llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
-    if (!C)
-      return C.takeError();
-
-    switch (*C) {
-    case Cursor::BadBlock:
-      return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                     "bad block found");
-    case Cursor::BlockEnd: {
-      if (!LocalParams.empty())
-        I->Params = allocateArray<FieldTypeInfo>(LocalParams, TransientArena);
-      return llvm::Error::success();
-    }
-    case Cursor::BlockBegin:
-      if (BlockOrCode == BI_FIELD_TYPE_BLOCK_ID) {
-        FieldTypeInfo FI;
-        if (auto Err = readBlock(BlockOrCode, &FI))
-          return Err;
-        LocalParams.push_back(std::move(FI));
-        continue;
-      }
-      if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
-        if (llvm::Error Skipped = Stream.SkipBlock())
-          return joinErrors(std::move(Err), std::move(Skipped));
-        return Err;
-      }
-      continue;
-    case Cursor::Record:
-      break;
-    }
-    if (auto Err = readRecord(BlockOrCode, I))
-      return Err;
-  }
+  return parseBlock(
+      ID, I,
+      [&](unsigned BlockOrCode) -> llvm::Expected<bool> {
+        if (BlockOrCode == BI_FIELD_TYPE_BLOCK_ID) {
+          FieldTypeInfo FI;
+          if (auto Err = readBlock(BlockOrCode, &FI))
+            return std::move(Err);
+          LocalParams.push_back(std::move(FI));
+          return true;
+        }
+        return false;
+      },
+      [&]() -> llvm::Error {
+        if (!LocalParams.empty())
+          I->Params = allocateArray<FieldTypeInfo>(LocalParams, TransientArena);
+        return llvm::Error::success();
+      },
+      [&](unsigned BlockOrCode) -> llvm::Error {
+        return readRecord(BlockOrCode, I);
+      });
 }
 
 // TODO: fix inconsistentent returning of errors in add callbacks.
diff --git a/clang-tools-extra/clang-doc/BitcodeReader.h b/clang-tools-extra/clang-doc/BitcodeReader.h
index 7516081e3f842..d3499fdee0f5d 100644
--- a/clang-tools-extra/clang-doc/BitcodeReader.h
+++ b/clang-tools-extra/clang-doc/BitcodeReader.h
@@ -45,6 +45,11 @@ class ClangDocBitcodeReader {
   // record found.
   template <typename T> llvm::Error readBlock(unsigned ID, T I);
 
+  template <typename T, typename BlockBeginHandler, typename BlockEndHandler,
+            typename RecordHandler>
+  llvm::Error parseBlock(unsigned ID, T I, BlockBeginHandler &&BBH,
+                         BlockEndHandler &&BEH, RecordHandler &&RH);
+
   // Step through a block of records to find the next data field.
   template <typename T> llvm::Error readSubBlock(unsigned ID, T I);
 
diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h
index 9208b77fc8606..ffcb60f7ab67a 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -799,4 +799,4 @@ static_assert(!std::is_trivially_destructible_v<VarInfo>);
 } // namespace doc
 } // namespace clang
 
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
\ No newline at end of file



More information about the llvm-branch-commits mailing list