[llvm] [Propeller] CFG drift detect (PR #80431)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 2 05:16:37 PST 2024


https://github.com/lifengxiang1025 created https://github.com/llvm/llvm-project/pull/80431

Now Propeller only detect code drift when using pgo together. This patch aims to add the ability to detect cfg drift.

>From 480dc02c79cc46cea892303b82ec308dbe9d6fcf Mon Sep 17 00:00:00 2001
From: lifengxiang <lifengxiang.1025 at bytedance.com>
Date: Fri, 2 Feb 2024 17:55:57 +0800
Subject: [PATCH] [Propeller] CFG drift detect

---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |  2 +-
 llvm/lib/CodeGen/BasicBlockSections.cpp       | 53 +++++++++++++++
 .../basic-block-sections-cfg-drift-detect.ll  | 67 +++++++++++++++++++
 3 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-cfg-drift-detect.ll

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index dc2035fa598c4..c3d8f14313d0f 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -659,7 +659,7 @@ class MachineBasicBlock
 
   /// Sets the fixed BBID of this basic block.
   void setBBID(const UniqueBBID &V) {
-    assert(!BBID.has_value() && "Cannot change BBID.");
+    assert((!BBID.has_value() || BBID->BaseID == 0) && "Cannot change BBID.");
     BBID = V;
   }
 
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index eb3f9e7078f1a..4ca278f405d54 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -79,6 +79,8 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 #include <optional>
+#include <queue>
+#include <unordered_set>
 
 using namespace llvm;
 
@@ -97,6 +99,17 @@ static cl::opt<bool> BBSectionsDetectSourceDrift(
              "mismatch for this function"),
     cl::init(true), cl::Hidden);
 
+static cl::opt<bool> BBSectionsDetectCFGDrift(
+    "bbsections-detect-cfg-drift",
+    cl::desc("This checks if function's cfg hash mismatch "
+             "with cfg hash recorded in profile"),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+    FirstMBBContainCFGHash("first-mbb-contain-cfg-hash",
+                           cl::desc("Use cfg hash as first mbb base id"),
+                           cl::init(false), cl::Hidden);
+
 namespace {
 
 class BasicBlockSections : public MachineFunctionPass {
@@ -284,9 +297,44 @@ bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) {
   return false;
 }
 
+static uint64_t getCFGMD5(MachineFunction &MF) {
+  std::unordered_set<MachineBasicBlock *> Visited;
+  MD5 Hash;
+  std::queue<MachineBasicBlock *> Q;
+  if (!MF.empty()) {
+    Q.push(&*MF.begin());
+    Visited.insert(&*MF.begin());
+  }
+  while (!Q.empty()) {
+    MachineBasicBlock *Now = Q.front();
+    Q.pop();
+    using namespace llvm::support;
+    uint64_t Value = endian::byte_swap<uint32_t, llvm::endianness::little>(
+        Now->getBBID()->BaseID);
+    Hash.update(llvm::ArrayRef((uint8_t *)&Value, sizeof(Value)));
+    for (auto Iter = Now->succ_begin(); Iter != Now->succ_end(); Iter++) {
+      if (Visited.count(&**Iter)) {
+        continue;
+      }
+      Q.push(&**Iter);
+      Visited.insert(&**Iter);
+    }
+  }
+  llvm::MD5::MD5Result Result;
+  Hash.final(Result);
+  return Result.low();
+}
+
 // Identify, arrange, and modify basic blocks which need separate sections
 // according to the specification provided by the -fbasic-block-sections flag.
 bool BasicBlockSections::handleBBSections(MachineFunction &MF) {
+  unsigned CFGHash = 0;
+  if (BBSectionsDetectCFGDrift) {
+    CFGHash = static_cast<unsigned>(getCFGMD5(MF));
+    if (FirstMBBContainCFGHash)
+      MF.begin()->setBBID({CFGHash, 0});
+  }
+
   auto BBSectionsType = MF.getTarget().getBBSectionsType();
   if (BBSectionsType == BasicBlockSection::None)
     return false;
@@ -316,6 +364,11 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) {
             .getClusterInfoForFunction(MF.getName());
     if (!HasProfile)
       return false;
+    // Check for cfg drift.
+    if (BBSectionsDetectCFGDrift &&
+        ClusterInfo.begin()->BBID.BaseID != CFGHash) {
+      return false;
+    }
     for (auto &BBClusterInfo : ClusterInfo) {
       FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo);
     }
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cfg-drift-detect.ll b/llvm/test/CodeGen/X86/basic-block-sections-cfg-drift-detect.ll
new file mode 100644
index 0000000000000..4e44c0bc6128a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cfg-drift-detect.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=labels -first-mbb-contain-cfg-hash=true -bbsections-detect-cfg-drift=true | FileCheck %s -check-prefix=HASH
+
+; RUN: echo '!_Z3fooi' > %t1
+; RUN: echo '!!2561660837 2' >> %t1
+; RUN: echo '!!1' >> %t1
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 -first-mbb-contain-cfg-hash=true -bbsections-detect-cfg-drift=true | FileCheck %s -check-prefix=MATCH
+
+; RUN: echo '!_Z3fooi' > %t1
+; RUN: echo '!!11111111 2' >> %t1
+; RUN: echo '!!1' >> %t1
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 -first-mbb-contain-cfg-hash=true -bbsections-detect-cfg-drift=true | FileCheck %s -check-prefix=MISMATCH
+
+; HASH: .ascii	"\245\257\277\305\t"            # BB id
+; MATCH: callq	_Z3bazi at PLT
+; MATCH: callq	_Z3bari at PLT
+; MISMATCH: callq	_Z3bari at PLT
+; MISMATCH: callq	_Z3bazi at PLT
+
+; ModuleID = 'foo.cc'
+source_filename = "foo.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef i32 @_Z3fooi(i32 noundef %a) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, ptr %a.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
+  %and = and i32 %0, 1
+  %tobool = icmp ne i32 %and, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %1 = load i32, ptr %a.addr, align 4
+  %call = call noundef i32 @_Z3bari(i32 noundef %1)
+  store i32 %call, ptr %retval, align 4
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %2 = load i32, ptr %a.addr, align 4
+  %call1 = call noundef i32 @_Z3bazi(i32 noundef %2)
+  store i32 %call1, ptr %retval, align 4
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32, ptr %retval, align 4
+  ret i32 %3
+}
+
+declare noundef i32 @_Z3bari(i32 noundef) #1
+
+declare noundef i32 @_Z3bazi(i32 noundef) #1
+
+attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 2}
+!5 = !{!"clang version 18.0.0"}



More information about the llvm-commits mailing list