[llvm-branch-commits] [clang] 54e03d0 - [PGO] Verify BFI counts after loading profile data
Rong Xu via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 14 16:20:52 PST 2020
Author: Rong Xu
Date: 2020-12-14T15:56:10-08:00
New Revision: 54e03d03a7a4d47f09d40bcbcfe484066a52a077
URL: https://github.com/llvm/llvm-project/commit/54e03d03a7a4d47f09d40bcbcfe484066a52a077
DIFF: https://github.com/llvm/llvm-project/commit/54e03d03a7a4d47f09d40bcbcfe484066a52a077.diff
LOG: [PGO] Verify BFI counts after loading profile data
This patch adds the functionality to compare BFI counts with real
profile
counts right after reading the profile. It will print remarks under
-Rpass-analysis=pgo, or the internal option -pass-remarks-analysis=pgo.
Differential Revision: https://reviews.llvm.org/D91813
Added:
llvm/test/Transforms/PGOProfile/Inputs/bfi_verification.proftext
llvm/test/Transforms/PGOProfile/bfi_verification.ll
Modified:
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/CodeGenModule.cpp
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 73194be922dd..ced287643c28 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1943,6 +1943,11 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
if (TargetDecl->hasAttr<ColdAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Cold);
+ if (TargetDecl->hasAttr<HotAttr>()) {
+ // xur
+ fprintf(stderr, "hihi 2\n");
+ // FuncAttrs.addAttribute(llvm::Attribute::Hot);
+ }
if (TargetDecl->hasAttr<NoDuplicateAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 0bb9c91f2434..dec0cba84343 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1744,6 +1744,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::OptimizeForSize);
B.addAttribute(llvm::Attribute::Cold);
}
+ if (D->hasAttr<HotAttr>()) {
+ if (!ShouldAddOptNone)
+ B.addAttribute(llvm::Attribute::OptimizeForSize);
+ // xur
+ // B.addAttribute(llvm::Attribute::Hot);
+ fprintf(stderr, "hihi 1\n");
+ }
if (D->hasAttr<MinSizeAttr>())
B.addAttribute(llvm::Attribute::MinSize);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 0228c8a8ef14..eba8d9e9c3c3 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -252,6 +252,30 @@ static cl::opt<bool> PGOInstrumentEntry(
"pgo-instrument-entry", cl::init(false), cl::Hidden,
cl::desc("Force to instrument function entry basicblock."));
+static cl::opt<bool> PGOVerifyHotBFI(
+ "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print out the non-match BFI count if a hot raw profile count "
+ "becomes non-hot, or a cold raw profile count becomes hot. "
+ "The print is enabled under -Rpass-analysis=pgo, or "
+ "internal option -pass-remakrs-analysis=pgo."));
+
+static cl::opt<bool> PGOVerifyBFI(
+ "pgo-verify-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print out mismatched BFI counts after setting profile metadata "
+ "The print is enabled under -Rpass-analysis=pgo, or "
+ "internal option -pass-remakrs-analysis=pgo."));
+
+static cl::opt<unsigned> PGOVerifyBFIRatio(
+ "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-big -- only print out "
+ "mismatched BFI if the
diff erence percentage is greater than "
+ "this value (in percentage)."));
+
+static cl::opt<unsigned> PGOVerifyBFICutoff(
+ "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
+ "profile count value is below."));
+
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
extern cl::opt<PGOViewCountsType> PGOViewCounts;
@@ -1616,6 +1640,82 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
return PreservedAnalyses::none();
}
+// Compare the profile count values with BFI count values, and print out
+// the non-matching ones.
+static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
+ BranchProbabilityInfo &NBPI,
+ uint64_t HotCountThreshold,
+ uint64_t ColdCountThreshold) {
+ Function &F = Func.getFunc();
+ BlockFrequencyInfo NBFI(F, NBPI, LI);
+ // bool PrintFunc = false;
+ bool HotBBOnly = PGOVerifyHotBFI;
+ std::string Msg;
+ OptimizationRemarkEmitter ORE(&F);
+
+ unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
+ for (auto &BBI : F) {
+ uint64_t CountValue = 0;
+ uint64_t BFICountValue = 0;
+
+ if (Func.getBBInfo(&BBI).CountValid)
+ CountValue = Func.getBBInfo(&BBI).CountValue;
+
+ BBNum++;
+ if (CountValue)
+ NonZeroBBNum++;
+ auto BFICount = NBFI.getBlockProfileCount(&BBI);
+ if (BFICount)
+ BFICountValue = BFICount.getValue();
+
+ if (HotBBOnly) {
+ bool rawIsHot = CountValue >= HotCountThreshold;
+ bool BFIIsHot = BFICountValue >= HotCountThreshold;
+ bool rawIsCold = CountValue <= ColdCountThreshold;
+ bool ShowCount = false;
+ if (rawIsHot && !BFIIsHot) {
+ Msg = "raw-Hot to BFI-nonHot";
+ ShowCount = true;
+ } else if (rawIsCold && BFIIsHot) {
+ Msg = "raw-Cold to BFI-Hot";
+ ShowCount = true;
+ }
+ if (!ShowCount)
+ continue;
+ } else {
+ if ((CountValue < PGOVerifyBFICutoff) &&
+ (BFICountValue < PGOVerifyBFICutoff))
+ continue;
+ uint64_t Diff = (BFICountValue >= CountValue)
+ ? BFICountValue - CountValue
+ : CountValue - BFICountValue;
+ if (Diff < CountValue / 100 * PGOVerifyBFIRatio)
+ continue;
+ }
+ BBMisMatchNum++;
+
+ ORE.emit([&]() {
+ OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
+ F.getSubprogram(), &BBI);
+ Remark << "BB " << ore::NV("Block", BBI.getName())
+ << " Count=" << ore::NV("Count", CountValue)
+ << " BFI_Count=" << ore::NV("Count", BFICountValue);
+ if (!Msg.empty())
+ Remark << " (" << Msg << ")";
+ return Remark;
+ });
+ }
+ if (BBMisMatchNum)
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
+ F.getSubprogram(), &F.getEntryBlock())
+ << "In Func " << ore::NV("Function", F.getName())
+ << ": Num_of_BB=" << ore::NV("Count", BBNum)
+ << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
+ << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
+ });
+}
+
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
@@ -1741,6 +1841,18 @@ static bool annotateAllFunctions(
Func.dumpInfo();
}
}
+
+ // Verify BlockFrequency information.
+ if (PGOVerifyBFI || PGOVerifyHotBFI) {
+ LoopInfo LI{DominatorTree(F)};
+ BranchProbabilityInfo NBPI(F, LI);
+ uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
+ if (PGOVerifyHotBFI) {
+ HotCountThreshold = PSI->getOrCompHotCountThreshold();
+ ColdCountThreshold = PSI->getOrCompColdCountThreshold();
+ }
+ verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
+ }
}
// Set function hotness attribute from the profile.
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/bfi_verification.proftext b/llvm/test/Transforms/PGOProfile/Inputs/bfi_verification.proftext
new file mode 100644
index 000000000000..dd5c2bcd57c5
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/Inputs/bfi_verification.proftext
@@ -0,0 +1,16 @@
+# IR level Instrumentation Flag
+:ir
+sort_basket
+# Func Hash:
+948827210500800754
+# Num Counters:
+7
+# Counter Values:
+41017879
+31616738
+39637749
+32743703
+13338888
+6990942
+6013544
+
diff --git a/llvm/test/Transforms/PGOProfile/bfi_verification.ll b/llvm/test/Transforms/PGOProfile/bfi_verification.ll
new file mode 100644
index 000000000000..8386ebf0db74
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/bfi_verification.ll
@@ -0,0 +1,111 @@
+; Note: Verify bfi counter after loading the profile.
+; RUN: llvm-profdata merge %S/Inputs/bfi_verification.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=THRESHOLD-CHECK
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-hot-bfi=true -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=HOTONLY-CHECK
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.basket = type { %struct.arc*, i64, i64 }
+%struct.arc = type { i64, %struct.node*, %struct.node*, i32, %struct.arc*, %struct.arc*, i64, i64 }
+%struct.node = type { i64, i32, %struct.node*, %struct.node*, %struct.node*, %struct.node*, %struct.arc*, %struct.arc*, %struct.arc*, %struct.arc*, i64, i64, i32, i32 }
+
+ at perm = internal unnamed_addr global [351 x %struct.basket*] zeroinitializer, align 16
+
+define dso_local void @sort_basket(i64 %min, i64 %max) {
+entry:
+ %add = add nsw i64 %min, %max
+ %div = sdiv i64 %add, 2
+ %arrayidx = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %div
+ %0 = load %struct.basket*, %struct.basket** %arrayidx, align 8
+ %abs_cost = getelementptr inbounds %struct.basket, %struct.basket* %0, i64 0, i32 2
+ %1 = load i64, i64* %abs_cost, align 8
+ br label %do.body
+
+do.body:
+ %r.0 = phi i64 [ %max, %entry ], [ %r.2, %if.end ]
+ %l.0 = phi i64 [ %min, %entry ], [ %l.2, %if.end ]
+ br label %while.cond
+
+while.cond:
+ %l.1 = phi i64 [ %l.0, %do.body ], [ %inc, %while.body ]
+ %arrayidx1 = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %l.1
+ %2 = load %struct.basket*, %struct.basket** %arrayidx1, align 8
+ %abs_cost2 = getelementptr inbounds %struct.basket, %struct.basket* %2, i64 0, i32 2
+ %3 = load i64, i64* %abs_cost2, align 8
+ %cmp = icmp sgt i64 %3, %1
+ br i1 %cmp, label %while.body, label %while.cond3
+
+while.body:
+ %inc = add nsw i64 %l.1, 1
+ br label %while.cond
+
+while.cond3:
+ %r.1 = phi i64 [ %r.0, %while.cond ], [ %dec, %while.body7 ]
+ %arrayidx4 = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %r.1
+ %4 = load %struct.basket*, %struct.basket** %arrayidx4, align 8
+ %abs_cost5 = getelementptr inbounds %struct.basket, %struct.basket* %4, i64 0, i32 2
+ %5 = load i64, i64* %abs_cost5, align 8
+ %cmp6 = icmp sgt i64 %1, %5
+ br i1 %cmp6, label %while.body7, label %while.end8
+
+while.body7:
+ %dec = add nsw i64 %r.1, -1
+ br label %while.cond3
+
+while.end8:
+ %cmp9 = icmp slt i64 %l.1, %r.1
+ br i1 %cmp9, label %if.then, label %if.end
+
+if.then:
+ %6 = bitcast %struct.basket** %arrayidx1 to i64*
+ %7 = load i64, i64* %6, align 8
+ store %struct.basket* %4, %struct.basket** %arrayidx1, align 8
+ %8 = bitcast %struct.basket** %arrayidx4 to i64*
+ store i64 %7, i64* %8, align 8
+ br label %if.end
+
+if.end:
+ %cmp14 = icmp sgt i64 %l.1, %r.1
+ %not.cmp14 = xor i1 %cmp14, true
+ %9 = zext i1 %not.cmp14 to i64
+ %r.2 = sub i64 %r.1, %9
+ %not.cmp1457 = xor i1 %cmp14, true
+ %inc16 = zext i1 %not.cmp1457 to i64
+ %l.2 = add nsw i64 %l.1, %inc16
+ %cmp19 = icmp sgt i64 %l.2, %r.2
+ br i1 %cmp19, label %do.end, label %do.body
+
+do.end:
+ %cmp20 = icmp sgt i64 %r.2, %min
+ br i1 %cmp20, label %if.then21, label %if.end22
+
+if.then21:
+ call void @sort_basket(i64 %min, i64 %r.2)
+ br label %if.end22
+
+if.end22:
+ %cmp23 = icmp slt i64 %l.2, %max
+ %cmp24 = icmp slt i64 %l.2, 51
+ %or.cond = and i1 %cmp23, %cmp24
+ br i1 %or.cond, label %if.then25, label %if.end26
+
+if.then25:
+ call void @sort_basket(i64 %l.2, i64 %max)
+ br label %if.end26
+
+if.end26:
+ ret void
+}
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB do.body Count=39637749 BFI_Count=40801304
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB while.cond Count=80655628 BFI_Count=83956530
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB while.body Count=41017879 BFI_Count=42370585
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB while.cond3 Count=71254487 BFI_Count=73756204
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB while.body7 Count=31616738 BFI_Count=32954900
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB while.end8 Count=39637749 BFI_Count=40801304
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB if.then Count=32743703 BFI_Count=33739540
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB if.end Count=39637749 BFI_Count=40801304
+; THRESHOLD-CHECK: remark: <unknown>:0:0: BB if.then25 Count=6013544 BFI_Count=6277124
+; THRESHOLD-CHECK: remark: <unknown>:0:0: In Func sort_basket: Num_of_BB=14, Num_of_non_zerovalue_BB=14, Num_of_mis_matching_BB=9
+; HOTONLY-CHECK: remark: <unknown>:0:0: BB if.then25 Count=6013544 BFI_Count=6277124 (raw-Cold to BFI-Hot)
+; HOTONLY-CHECK: remark: <unknown>:0:0: In Func sort_basket: Num_of_BB=14, Num_of_non_zerovalue_BB=14, Num_of_mis_matching_BB=1
More information about the llvm-branch-commits
mailing list