[llvm] d23c1d6 - [AutoFDO] Avoid merging inlinee samples multiple times

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 31 09:30:24 PDT 2020


Author: Hongtao Yu
Date: 2020-07-31T09:30:05-07:00
New Revision: d23c1d6a8dddf0e1b9b9fa64726941e402ede8af

URL: https://github.com/llvm/llvm-project/commit/d23c1d6a8dddf0e1b9b9fa64726941e402ede8af
DIFF: https://github.com/llvm/llvm-project/commit/d23c1d6a8dddf0e1b9b9fa64726941e402ede8af.diff

LOG: [AutoFDO] Avoid merging inlinee samples multiple times

A function call can be replicated by optimizations like loop unroll and jump threading and the replicates end up sharing the sample nested callee profile. Therefore when it comes to merging samples for uninlined callees in the sample profile inliner, a callee profile can be merged multiple times which will cause an assert to fire.

This change avoids merging same callee profile for duplicate callsites by filtering out callee profiles with a non-zero head sample count.

Reviewed By: wenlei, wmi

Differential Revision: https://reviews.llvm.org/D84997

Added: 
    llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll

Modified: 
    llvm/lib/Transforms/IPO/SampleProfile.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index ca60d35b8aaf..5978f2dc6d1d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1106,16 +1106,26 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
 
     if (ProfileMergeInlinee) {
-      // Use entry samples as head samples during the merge, as inlinees
-      // don't have head samples.
-      assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee");
-      const_cast<FunctionSamples *>(FS)->addHeadSamples(FS->getEntrySamples());
-
-      // Note that we have to do the merge right after processing function.
-      // This allows OutlineFS's profile to be used for annotation during
-      // top-down processing of functions' annotation.
-      FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
-      OutlineFS->merge(*FS);
+      // A function call can be replicated by optimizations like callsite
+      // splitting or jump threading and the replicates end up sharing the
+      // sample nested callee profile instead of slicing the original inlinee's
+      // profile. We want to do merge exactly once by filtering out callee
+      // profiles with a non-zero head sample count.
+      if (FS->getHeadSamples() == 0) {
+        // Use entry samples as head samples during the merge, as inlinees
+        // don't have head samples.
+        const_cast<FunctionSamples *>(FS)->addHeadSamples(
+            FS->getEntrySamples());
+
+        // Note that we have to do the merge right after processing function.
+        // This allows OutlineFS's profile to be used for annotation during
+        // top-down processing of functions' annotation.
+        FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+        OutlineFS->merge(*FS);
+      } else
+        assert(FS->getHeadSamples() == FS->getEntrySamples() &&
+               "Expect same head and entry sample counts for profiles already "
+               "merged.");
     } else {
       auto pair =
           notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});

diff  --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll
new file mode 100644
index 000000000000..8d1379a58229
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll
@@ -0,0 +1,80 @@
+;; Test we merge non-inlined profile only once with '-sample-profile-merge-inlinee'
+; RUN: opt < %s -passes='function(callsite-splitting),sample-profile' -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -S | FileCheck %s
+
+%struct.bitmap = type { i32, %struct.bitmap* }
+
+; CHECK-LABEL: @main
+define void @main(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt) #0 !dbg !6 {
+entry:
+  br label %Top
+
+Top:
+  %tobool1 = icmp eq %struct.bitmap* %a_elt, null
+  br i1 %tobool1, label %CallSiteBB, label %NextCond
+
+NextCond:
+  %cmp = icmp ne %struct.bitmap* %b_elt, null
+  br i1 %cmp, label %CallSiteBB, label %End
+
+CallSiteBB:
+  %p = phi i1 [0, %Top], [%c, %NextCond]
+;; The call site is replicated by callsite-splitting pass and they end up share the same sample profile
+; CHECK: call void @_Z3sumii(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false)
+; CHECK: call void @_Z3sumii(%struct.bitmap* nonnull %a_elt, %struct.bitmap* nonnull %a_elt, %struct.bitmap* nonnull %b_elt, i1 %c)
+  call void @_Z3sumii(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %p), !dbg !8
+  br label %End
+
+End:
+  ret void
+}
+
+define void @_Z3sumii(%struct.bitmap* %dst_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %c)  #0 !dbg !12 {
+entry:
+  %tobool = icmp ne %struct.bitmap* %a_elt, null
+  %tobool1 = icmp ne %struct.bitmap* %b_elt, null
+  %or.cond = and i1 %tobool, %tobool1, !dbg !13
+  br i1 %or.cond, label %Cond, label %Big
+
+Cond:
+  %cmp = icmp eq %struct.bitmap*  %dst_elt, %a_elt, !dbg !14
+  br i1 %cmp, label %Small, label %Big, !dbg !15
+
+Small:
+  br label %End
+
+Big:
+  br label %End
+
+End:
+  ret void
+}
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.5 "}
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 10, scope: !9)
+!9 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 2)
+!10 = distinct !DILexicalBlock(scope: !6, file: !1, line: 10)
+!11 = !DILocation(line: 12, scope: !6)
+!12 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 4, scope: !12)
+!14 = !DILocation(line: 5, scope: !12)
+!15 = !DILocation(line: 6, scope: !12)
+
+
+;; Check the profile of funciton sum is only merged once though the original callsite is replicted.
+; CHECK: name: "sum"
+; CHECK-NEXT: {!"function_entry_count", i64 46}
+; CHECK: !{!"branch_weights", i32 11, i32 37}
+; CHECK: !{!"branch_weights", i32 11, i32 1}


        


More information about the llvm-commits mailing list