[llvm] Unittests and usability for BitstreamWriter incremental flushing (PR #92983)

Wed May 29 14:18:59 PDT 2024

================
@@ -77,47 +86,106 @@ class BitstreamWriter {
   void WriteWord(unsigned Value) {
     Value =
         support::endian::byte_swap<uint32_t, llvm::endianness::little>(Value);
-    Out.append(reinterpret_cast<const char *>(&Value),
-               reinterpret_cast<const char *>(&Value + 1));
+    Buffer.append(reinterpret_cast<const char *>(&Value),
+                  reinterpret_cast<const char *>(&Value + 1));
   }
 
-  uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; }
+  uint64_t GetNumOfFlushedBytes() const {
+    return fdStream() ? fdStream()->tell() : 0;
+  }
 
-  size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); }
+  size_t GetBufferOffset() const {
+    return Buffer.size() + GetNumOfFlushedBytes();
+  }
 
   size_t GetWordIndex() const {
     size_t Offset = GetBufferOffset();
     assert((Offset & 3) == 0 && "Not 32-bit aligned");
     return Offset / 4;
   }
 
-  /// If the related file stream supports reading, seeking and writing, flush
-  /// the buffer if its size is above a threshold.
-  void FlushToFile() {
-    if (!FS)
+  void flushAndClear() {
+    assert(FS);
+    assert(!Buffer.empty());
+    assert(!BlockFlushingStartPos &&
+           "a call to markAndBlockFlushing should have been paired with a "
+           "call to getMarkedBufferAndResumeFlushing");
+    FS->write(Buffer.data(), Buffer.size());
+    Buffer.clear();
+  }
+
+  /// If the related file stream is a raw_fd_stream, flush the buffer if its
+  /// size is above a threshold. If \p OnClosing is true, flushing happens
+  /// regardless of thresholds.
+  void FlushToFile(bool OnClosing = false) {
+    if (!FS || Buffer.empty())
       return;
-    if (Out.size() < FlushThreshold)
+    if (OnClosing)
+      return flushAndClear();
+    if (BlockFlushingStartPos)
       return;
-    FS->write((char *)&Out.front(), Out.size());
-    Out.clear();
+    if (fdStream() && Buffer.size() > FlushThreshold)
+      flushAndClear();
+  }
+
+  raw_fd_stream *fdStream() { return dyn_cast_or_null<raw_fd_stream>(FS); }
+
+  const raw_fd_stream *fdStream() const {
+    return dyn_cast_or_null<raw_fd_stream>(FS);
+  }
+
+  SmallVectorImpl<char> &getInternalBufferFromStream(raw_ostream &OutStream) {
+    if (auto *SV = dyn_cast<raw_svector_ostream>(&OutStream))
+      return SV->buffer();
+    return OwnBuffer;
   }
 
 public:
-  /// Create a BitstreamWriter that writes to Buffer \p O.
+  /// Create a BitstreamWriter over a raw_ostream \p OutStream.
+  /// If \p OutStream is a raw_svector_ostream, the BitstreamWriter will write
+  /// directly to the latter's buffer. In all other cases, the BitstreamWriter
+  /// will use an internal buffer and flush at the end of its lifetime.
   ///
-  /// \p FS is the file stream that \p O flushes to incrementally. If \p FS is
-  /// null, \p O does not flush incrementially, but writes to disk at the end.
+  /// In addition, if \p is a raw_fd_stream supporting seek, tell, and read
+  /// (besides write), the BitstreamWriter will also flush incrementally, when a
+  /// subblock is finished, and if the FlushThreshold is passed.
   ///
-  /// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is
-  /// valid. Flushing only occurs at (sub)block boundaries.
-  BitstreamWriter(SmallVectorImpl<char> &O, raw_fd_stream *FS = nullptr,
-                  uint32_t FlushThreshold = 512)
-      : Out(O), FS(FS), FlushThreshold(uint64_t(FlushThreshold) << 20), CurBit(0),
-        CurValue(0), CurCodeSize(2) {}
+  /// NOTE: \p FlushThreshold's unit is MB.
+  BitstreamWriter(raw_ostream &OutStream, uint32_t FlushThreshold = 512)
+      : Buffer(getInternalBufferFromStream(OutStream)),
+        FS(!isa<raw_svector_ostream>(OutStream) ? &OutStream : nullptr),
----------------
mtrofin wrote:

Yes, because it is really a buffer dressed as a stream. So the intention of the user would be to capture the bytes in that buffer.

We also special-case raw_fd_stream for the incremental flushing scenario.

https://github.com/llvm/llvm-project/pull/92983