[llvm] 3cbf441 - [SampleFDO] Do not scale the magic number NOMORE_ICP_MAGICNUM in value profile

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 29 09:34:51 PDT 2021


Author: Wei Mi
Date: 2021-03-29T09:34:37-07:00
New Revision: 3cbf44190b593210bdc78f45e5f747cb234ce5d9

URL: https://github.com/llvm/llvm-project/commit/3cbf44190b593210bdc78f45e5f747cb234ce5d9
DIFF: https://github.com/llvm/llvm-project/commit/3cbf44190b593210bdc78f45e5f747cb234ce5d9.diff

LOG: [SampleFDO] Do not scale the magic number NOMORE_ICP_MAGICNUM in value profile
during profile update.

When we inline a function and update the profile, the value profiles of the
indirect call in the inliner and inlinee will be scaled. In
https://reviews.llvm.org/D96806 and https://reviews.llvm.org/D97350, we start
using the magic number NOMORE_ICP_MAGICNUM (-1) to mark targets which have
been promoted. The magic number shouldn't be scaled during the profile update.

Although the problem has been suppressed by https://reviews.llvm.org/D98187
for SampleFDO, which stops profile update for inlining in sampleFDO, the patch
is still wanted since it will be more consistent to handle the magic number
properly in profile update.

Differential Revision: https://reviews.llvm.org/D99394

Added: 
    llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll

Modified: 
    llvm/include/llvm/IR/Metadata.h
    llvm/include/llvm/ProfileData/InstrProf.h
    llvm/lib/IR/Instructions.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 6393d882a43f..d74b0800106e 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -52,6 +52,10 @@ enum LLVMConstants : uint32_t {
   DEBUG_METADATA_VERSION = 3 // Current debug info version number.
 };
 
+/// Magic number in the value profile metadata showing a target has been
+/// promoted for the instruction and shouldn't be promoted again.
+const uint64_t NOMORE_ICP_MAGICNUM = -1;
+
 /// Root of the metadata hierarchy.
 ///
 /// This is a root class for typeless data in the IR.

diff  --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 4a2cebe03f07..87e9028480f9 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -253,10 +253,6 @@ void annotateValueSite(Module &M, Instruction &Inst,
                        ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
                        InstrProfValueKind ValueKind, uint32_t MaxMDCount);
 
-/// Magic number in the value profile data showing a target has been
-/// promoted for the instruction and shouldn't be promoted again.
-const uint64_t NOMORE_ICP_MAGICNUM = -1;
-
 /// Extract the value profile data from \p Inst which is annotated with
 /// value profile meta data. Return false if there is no value data annotated,
 /// otherwise  return true.

diff  --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 15567e94cb2e..8ddadf410d56 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -595,11 +595,17 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
     for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
       // The first value is the key of the value profile, which will not change.
       Vals.push_back(ProfileData->getOperand(i));
+      uint64_t Count =
+          mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
+              ->getValue()
+              .getZExtValue();
+      // Don't scale the magic number.
+      if (Count == NOMORE_ICP_MAGICNUM) {
+        Vals.push_back(ProfileData->getOperand(i + 1));
+        continue;
+      }
       // Using APInt::div may be expensive, but most cases should fit 64 bits.
-      APInt Val(128,
-                mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
-                    ->getValue()
-                    .getZExtValue());
+      APInt Val(128, Count);
       Val *= APS;
       Vals.push_back(MDB.createConstant(
           ConstantInt::get(Type::getInt64Ty(getContext()),

diff  --git a/llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll
new file mode 100644
index 000000000000..5bfcce7d6e7d
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/norepeated-icp-4.ll
@@ -0,0 +1,79 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at p = dso_local global void ()* null, align 8
+
+; After _Z3goov is inlined into _Z3foov, the value profile of the indirect
+; call in _Z3goov and _Z3foov need to be scaled. The test is to make sure
+; the magic number NOMORE_ICP_MAGICNUM used for prevent recursive indirect
+; call will be kept during the scaling.
+;
+; CHECK-LABEL: @_Z3goov(
+; CHECK: call void %t0(), {{.*}} !prof ![[PROF_ID1:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+; CHECK-LABEL: @_Z3foov(
+; CHECK: call void %t0.i(), {{.*}} !prof ![[PROF_ID2:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+; Function Attrs: uwtable mustprogress
+define dso_local void @_Z3goov() #0 !dbg !11 !prof !23 {
+entry:
+  %t0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13
+  call void %t0(), !dbg !17, !prof !22
+  ret void, !dbg !18
+}
+
+; Function Attrs: uwtable mustprogress
+define dso_local void @_Z3foov() #0 !dbg !19 {
+entry:
+  call void @_Z3goov(), !dbg !20, !prof !24
+  ret void, !dbg !21
+}
+
+attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !25}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "1.cc", directory: "")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!""}
+!8 = !DISubroutineType(types: !2)
+!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!12 = !DILocation(line: 7, column: 5, scope: !11)
+!13 = !{!14, !14, i64 0}
+!14 = !{!"any pointer", !15, i64 0}
+!15 = !{!"omnipotent char", !16, i64 0}
+!16 = !{!"Simple C++ TBAA"}
+!17 = !DILocation(line: 7, column: 3, scope: !11)
+!18 = !DILocation(line: 8, column: 1, scope: !11)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!20 = !DILocation(line: 11, column: 3, scope: !19)
+!21 = !DILocation(line: 12, column: 3, scope: !19)
+; CHECK: ![[PROF_ID1]] = !{!"VP", i32 0, i64 7200, i64 -7383239051784516332, i64 -1, i64 -3834823603621627078, i64 7200}
+; CHECK: ![[PROF_ID2]] = !{!"VP", i32 0, i64 800, i64 -7383239051784516332, i64 -1, i64 -3834823603621627078, i64 800}
+!22 = !{!"VP", i32 0, i64 8000, i64 -7383239051784516332, i64 -1, i64 125292384912345234234, i64 8000}
+!23 = !{!"function_entry_count", i64 1000} 
+!24 = !{!"branch_weights", i32 100}
+!25 = !{i32 1, !"ProfileSummary", !26}
+!26 = !{!27, !28, !29, !30, !31, !32, !33, !34}
+!27 = !{!"ProfileFormat", !"SampleProfile"}
+!28 = !{!"TotalCount", i64 10000}
+!29 = !{!"MaxCount", i64 1000}
+!30 = !{!"MaxInternalCount", i64 1}
+!31 = !{!"MaxFunctionCount", i64 1000}
+!32 = !{!"NumCounts", i64 3}
+!33 = !{!"NumFunctions", i64 3}
+!34 = !{!"DetailedSummary", !35}
+!35 = !{!36, !37, !38}
+!36 = !{i32 10000, i64 100, i32 1}
+!37 = !{i32 999000, i64 100, i32 1}
+!38 = !{i32 999999, i64 1, i32 2}


        


More information about the llvm-commits mailing list