[llvm] [PGO] Sampled instrumentation in PGO to speed up instrumentation binary (PR #69535)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 16:23:19 PDT 2024
================
@@ -635,33 +664,161 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
return PreservedAnalyses::none();
}
+//
+// Perform instrumentation sampling.
+//
+// There are 3 favors of sampling:
+// (1) Full burst sampling: We transform:
+// Increment_Instruction;
+// to:
+// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
+// Increment_Instruction;
+// }
+// __llvm_profile_sampling__ += 1;
+// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
+// __llvm_profile_sampling__ = 0;
+// }
+//
+// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
+// counters (value-instrumentation and edge instrumentation).
+//
+// (2) Fast burst sampling:
+// The value is an unsigned type, meaning it will wrap around to zero when
+// overflows. In this case, a second check (check2) is unnecessary, so we
+// won't generate check2 when the SampledInstrPeriod is set to 65535 (64K - 1).
+// The code after:
+// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
+// Increment_Instruction;
+// }
+// __llvm_profile_sampling__ += 1;
+//
+// (3) Simple sampling:
+// When SampledInstrBurstDuration sets to 1, we do a simple sampling:
+// __llvm_profile_sampling__ += 1;
+// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
+// __llvm_profile_sampling__ = 0;
+// Increment_Instruction;
+// }
+//
+// Note that, the code snippet after the transformation can still be counter
+// promoted. However, with sampling enabled, counter updates are expected to
+// be infrequent, making the benefits of counter promotion negligible.
+// Moreover, counter promotion can potentially cause issues in server
+// applications, particularly when the counters are dumped without a clean
+// exit. To mitigate this risk, counter promotion is disabled by default when
+// sampling is enabled. This behavior can be overridden using the internal
+// option.
+void InstrLowerer::doSampling(Instruction *I) {
+ if (!isSamplingEnabled())
+ return;
+
+ unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue();
+ unsigned SampledPeriod = SampledInstrPeriod.getValue();
+ assert(SampledBurstDuration < SampledPeriod);
+ bool UseShort = (SampledPeriod <= USHRT_MAX);
+ bool IsSimpleSampling = (SampledBurstDuration == 1);
+ bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535);
+
+ auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) {
+ if (UseShort)
+ return Builder.getInt16(C);
+ else
+ return Builder.getInt32(C);
+ };
+
+ IntegerType *SamplingVarTy;
+ if (UseShort)
+ SamplingVarTy = Type::getInt16Ty(M.getContext());
+ else
+ SamplingVarTy = Type::getInt32Ty(M.getContext());
+ auto *SamplingVar =
+ M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
+ assert(SamplingVar && "SamplingVar not set properly");
+
+ // Create the condition for checking the burst duration.
+ Instruction *SamplingVarIncr;
+ Value *NewSamplingVarVal;
+ MDBuilder MDB(I->getContext());
+ MDNode *BranchWeight;
+ IRBuilder<> CondBuilder(I);
+ auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);
+ if (IsSimpleSampling) {
+ // For the simple sampling, just create the load and increments.
+ IRBuilder<> IncBuilder(I);
+ NewSamplingVarVal =
+ IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
+ SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
+ } else {
+ // For the bust-sampling, create the conditonal update.
+ auto *DurationCond = CondBuilder.CreateICmpULE(
+ LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration));
+ BranchWeight = MDB.createBranchWeights(
+ SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration);
+ Instruction *ThenTerm = SplitBlockAndInsertIfThen(
+ DurationCond, I, /* Unreachable */ false, BranchWeight);
+ IRBuilder<> IncBuilder(I);
+ NewSamplingVarVal =
+ IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
+ SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
+ I->moveBefore(ThenTerm);
+ }
+
+ if (IsFastSampling)
+ return;
+
+ // Create the condtion for checking the period.
+ Instruction *ThenTerm, *ElseTerm;
+ IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
+ auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
+ NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod));
+ BranchWeight = MDB.createBranchWeights(1, SampledPeriod);
+ SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,
+ &ElseTerm, BranchWeight);
+
+ // For the simple sampling, the counter update happens in sampling var reset.
+ if (IsSimpleSampling)
+ I->moveBefore(ThenTerm);
+
+ IRBuilder<> ResetBuilder(ThenTerm);
+ ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar);
+ SamplingVarIncr->moveBefore(ElseTerm);
+}
+
bool InstrLowerer::lowerIntrinsics(Function *F) {
bool MadeChange = false;
PromotionCandidates.clear();
+ SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
+
for (BasicBlock &BB : *F) {
for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
- if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) {
- lowerIncrement(IPIS);
- MadeChange = true;
- } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
- lowerIncrement(IPI);
- MadeChange = true;
- } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) {
- lowerTimestamp(IPC);
- MadeChange = true;
- } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
- lowerCover(IPC);
- MadeChange = true;
- } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
- lowerValueProfileInst(IPVP);
- MadeChange = true;
- } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) {
- IPMP->eraseFromParent();
- MadeChange = true;
- } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) {
- lowerMCDCTestVectorBitmapUpdate(IPBU);
- MadeChange = true;
- }
+ if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
+ InstrProfInsts.push_back(IP);
----------------
xur-llvm wrote:
hmm. This is a good question. I have this since the first implementation. We need this at that time as the CFG changed in the middle of the iterator. Let me try to see if we still need it.
https://github.com/llvm/llvm-project/pull/69535
More information about the llvm-commits
mailing list