[llvm] [BOLT][NFC] Speedup merge-fdata (PR #119942)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 13 16:53:08 PST 2024


https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/119942

Eliminate splitting the buffer into lines, and use `std::getline`
directly. Simplify no_lbr and boltedcollection handling as well.

Test Plan: For a large fdata file (200MB), fstream version is ~10% faster.


>From 66f78b75f6d3045aa844b40079be326163964314 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 13 Dec 2024 16:52:57 -0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/test/merge-fdata-mixed-bat-no-lbr.test |  2 +-
 bolt/test/merge-fdata-mixed-mode.test       |  2 +-
 bolt/tools/merge-fdata/merge-fdata.cpp      | 61 +++++++++------------
 3 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/bolt/test/merge-fdata-mixed-bat-no-lbr.test b/bolt/test/merge-fdata-mixed-bat-no-lbr.test
index b59773d2d5054f..eeb3a0e23b0cca 100644
--- a/bolt/test/merge-fdata-mixed-bat-no-lbr.test
+++ b/bolt/test/merge-fdata-mixed-bat-no-lbr.test
@@ -5,7 +5,7 @@
 # RUN: split-file %s %t
 # RUN: not merge-fdata %t/a.fdata %t/b.fdata 2>&1 | FileCheck %s
 
-# CHECK: cannot mix profile collected in BOLT and non-BOLT deployments
+# CHECK: cannot mix profile with and without boltedcollection
 
 #--- a.fdata
 boltedcollection
diff --git a/bolt/test/merge-fdata-mixed-mode.test b/bolt/test/merge-fdata-mixed-mode.test
index 3e1a3bfcb94542..f897fec5d9db47 100644
--- a/bolt/test/merge-fdata-mixed-mode.test
+++ b/bolt/test/merge-fdata-mixed-mode.test
@@ -6,7 +6,7 @@
 # RUN: split-file %s %t
 # RUN: not merge-fdata %t/a.fdata %t/b.fdata 2>&1 | FileCheck %s
 
-# CHECK: cannot mix 'no_lbr' and 'lbr' profiles.
+# CHECK: cannot mix profile with and without no_lbr
 
 #--- a.fdata
 no_lbr
diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp
index 39f58a7e8800e0..6b428e87ae24c7 100644
--- a/bolt/tools/merge-fdata/merge-fdata.cpp
+++ b/bolt/tools/merge-fdata/merge-fdata.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/ThreadPool.h"
 #include <algorithm>
+#include <fstream>
 #include <mutex>
 #include <unordered_map>
 
@@ -274,52 +275,40 @@ void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
 
     if (isYAML(Filename))
       report_error(Filename, "cannot mix YAML and legacy formats");
-    ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
-        MemoryBuffer::getFileOrSTDIN(Filename);
-    if (std::error_code EC = MB.getError())
-      report_error(Filename, EC);
 
-    StringRef Buf = MB.get()->getBuffer();
+    std::ifstream FdataFile(Filename, std::ios::in);
+    std::string FdataLine;
+
+    auto checkMode = [&](const std::string &Key, std::optional<bool> &Flag) {
+      std::string ErrorMsg = "cannot mix profile with and without " + Key;
+      auto Pos = FdataFile.tellg();
+      std::getline(FdataFile, FdataLine);
+      if (FdataLine.rfind(Key, 0) == 0) {
+        if (!Flag.value_or(true))
+          report_error(Filename, ErrorMsg);
+        Flag = true;
+      } else {
+        if (Flag.value_or(false))
+          report_error(Filename, ErrorMsg);
+        Flag = false;
+        // Rewind line
+        FdataFile.seekg(Pos);
+      }
+    };
+
     ProfileTy *Profile;
     {
       std::lock_guard<std::mutex> Lock(BoltedCollectionMutex);
       // Check if the string "boltedcollection" is in the first line
-      if (Buf.starts_with("boltedcollection\n")) {
-        if (!BoltedCollection.value_or(true))
-          report_error(
-              Filename,
-              "cannot mix profile collected in BOLT and non-BOLT deployments");
-        BoltedCollection = true;
-        Buf = Buf.drop_front(17);
-      } else {
-        if (BoltedCollection.value_or(false))
-          report_error(
-              Filename,
-              "cannot mix profile collected in BOLT and non-BOLT deployments");
-        BoltedCollection = false;
-      }
+      checkMode("boltedcollection", BoltedCollection);
       // Check if the string "no_lbr" is in the first line
       // (or second line if BoltedCollection is true)
-      size_t CheckNoLBRPos = Buf.find('\n');
-      if (CheckNoLBRPos != StringRef::npos) {
-        StringRef FirstLine = Buf.substr(0, CheckNoLBRPos);
-        if (FirstLine.contains("no_lbr")) {
-          if (!NoLBRCollection.value_or(true))
-            report_error(Filename, "cannot mix 'no_lbr' and 'lbr' profiles");
-          NoLBRCollection = true;
-          Buf = Buf.drop_front(CheckNoLBRPos + 1);
-        } else {
-          if (NoLBRCollection.value_or(false))
-            report_error(Filename, "cannot mix 'no_lbr' and 'lbr' profiles");
-          NoLBRCollection = false;
-        }
-      }
+      checkMode("no_lbr", NoLBRCollection);
       Profile = &Profiles[tid];
     }
 
-    SmallVector<StringRef> Lines;
-    SplitString(Buf, Lines, "\n");
-    for (StringRef Line : Lines) {
+    while (std::getline(FdataFile, FdataLine)) {
+      StringRef Line(FdataLine);
       size_t Pos = Line.rfind(" ");
       if (Pos == StringRef::npos)
         report_error(Filename, "Malformed / corrupted profile");



More information about the llvm-commits mailing list