[llvm] 3307240 - [InstrProfiling][ELF] Make __profd_ private if the function does not use value profiling

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 18 17:01:22 PDT 2021


Author: Fangrui Song
Date: 2021-06-18T17:01:17-07:00
New Revision: 3307240f057b856bfb01c1e42e260aa3f896c592

URL: https://github.com/llvm/llvm-project/commit/3307240f057b856bfb01c1e42e260aa3f896c592
DIFF: https://github.com/llvm/llvm-project/commit/3307240f057b856bfb01c1e42e260aa3f896c592.diff

LOG: [InstrProfiling][ELF] Make __profd_ private if the function does not use value profiling

On ELF, the D1003372 optimization can apply to more cases. There are two
prerequisites for making `__profd_` private:

* `__profc_` keeps `__profd_` live under compiler/linker GC
* `__profd_` is not referenced by code

The first is satisfied because all counters/data are in a section group (either
`comdat any` or `comdat noduplicates`). The second requires that the function
does not use value profiling.

Regarding the second point: `__profd_` may be referenced by other text sections
due to inlining. There will be a linker error if a prevailing text section
references the non-prevailing local symbol.

With this change, a stage 2 (`-DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_INSTRUMENTED=IR`)
clang is 4.2% smaller (1-169620032/177066968).
`stat -c %s **/*.o | awk '{s+=$1}END{print s}' is 2.5% smaller.

Reviewed By: davidxl, rnk

Differential Revision: https://reviews.llvm.org/D103717

Added: 
    

Modified: 
    llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
    llvm/test/Transforms/PGOProfile/indirect_call_profile.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 4a5511d375fd7..721f8c034438f 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -887,25 +887,22 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
   // Allocate statically the array of pointers to value profile nodes for
   // the current function.
   Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
-  if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
-    uint64_t NS = 0;
-    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
-      NS += PD.NumValueSites[Kind];
-    if (NS) {
-      ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
-
-      auto *ValuesVar =
-          new GlobalVariable(*M, ValuesTy, false, Linkage,
-                             Constant::getNullValue(ValuesTy),
-                             getVarName(Inc, getInstrProfValuesVarPrefix()));
-      ValuesVar->setVisibility(Visibility);
-      ValuesVar->setSection(
-          getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
-      ValuesVar->setAlignment(Align(8));
-      MaybeSetComdat(ValuesVar);
-      ValuesPtrExpr =
-          ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
-    }
+  uint64_t NS = 0;
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    NS += PD.NumValueSites[Kind];
+  if (NS > 0 && ValueProfileStaticAlloc &&
+      !needsRuntimeRegistrationOfSectionRange(TT)) {
+    ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
+    auto *ValuesVar = new GlobalVariable(
+        *M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
+        getVarName(Inc, getInstrProfValuesVarPrefix()));
+    ValuesVar->setVisibility(Visibility);
+    ValuesVar->setSection(
+        getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
+    ValuesVar->setAlignment(Align(8));
+    MaybeSetComdat(ValuesVar);
+    ValuesPtrExpr =
+        ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
   }
 
   // Create data variable.
@@ -929,10 +926,15 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
 #include "llvm/ProfileData/InstrProfData.inc"
   };
-  // If code never references data variables (the symbol is unneeded), and
-  // linker GC cannot discard data variables while the text section is retained,
-  // data variables can be private. This optimization applies on COFF and ELF.
-  if (!DataReferencedByCode && !TT.isOSBinFormatMachO()) {
+  // If the data variable is not referenced by code (if we don't emit
+  // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
+  // data variable live under linker GC, the data variable can be private. This
+  // optimization applies to ELF.
+  //
+  // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
+  // to be false.
+  if (NS == 0 && (TT.isOSBinFormatELF() ||
+                  (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
     Linkage = GlobalValue::PrivateLinkage;
     Visibility = GlobalValue::DefaultVisibility;
   }

diff  --git a/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll b/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll
index a3c05e5b47015..050392229e8c3 100644
--- a/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll
+++ b/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll
@@ -1,22 +1,58 @@
 ; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
 ; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
-; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s --check-prefix=LOWER
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -vp-static-alloc=true -S | FileCheck %s --check-prefix=LOWER
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -vp-static-alloc=false -S | FileCheck %s --check-prefix=LOWER
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-$foo3 = comdat any
+$novp_inline = comdat any
+$vp_inline = comdat any
 
 @bar = external global void ()*, align 8
+
+; GEN: @__profn_novp_inline = linkonce_odr hidden constant [11 x i8] c"novp_inline"
 ; GEN: @__profn_foo = private constant [3 x i8] c"foo"
+; GEN: @__profn_vp_inline = linkonce_odr hidden constant [9 x i8] c"vp_inline"
+
+;; Test that a linkonce function's address is recorded.
+;; We allow a linkonce profd to be private if the function does not use value profiling.
+; LOWER:      @__profd_novp_inline.[[HASH:[0-9]+]] = private global {{.*}} @__profc_novp_inline.[[HASH]]
+; LOWER-SAME:   i8* bitcast (void ()* @novp_inline to i8*)
+; LOWER:      @__profd_foo = private {{.*}} @__profc_foo
+
+;; __profd_vp_inline.[[#]] is referenced by code and may be referenced by other
+;; text sections due to inlining. It can't be local because a linker error would
+;; occur if a prevailing text section references the non-prevailing local symbol.
+; LOWER:      @__profd_vp_inline.[[FOO_HASH:[0-9]+]] = linkonce_odr hidden {{.*}} @__profc_vp_inline.[[FOO_HASH]]
+; LOWER-SAME:   i8* bitcast (void ()* @vp_inline to i8*)
+
+define linkonce_odr void @novp_inline() comdat {
+  ret void
+}
 
 define void @foo() {
 entry:
+; GEN: @foo()
+; GEN: entry:
+; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[#FOO_HASH:]], i32 1, i32 0)
+  %tmp = load void ()*, void ()** @bar, align 8
+; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64
+; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[#FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0)
+; LOWER: call void @__llvm_profile_instrument_target(i64 %1, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
+  call void %tmp()
+  ret void
+}
+
+define linkonce_odr void @vp_inline() comdat {
+entry:
+; GEN: @vp_inline()
 ; GEN: entry:
-; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH:[0-9]+]], i32 1, i32 0)
+; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_vp_inline, i32 0, i32 0), i64 [[#FOO_HASH:]], i32 1, i32 0)
   %tmp = load void ()*, void ()** @bar, align 8
 ; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64
-; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0)
+; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_vp_inline, i32 0, i32 0), i64 [[#FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0)
+; LOWER: call void @__llvm_profile_instrument_target(i64 %1, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_vp_inline.[[#]] to i8*), i32 0)
   call void %tmp()
   ret void
 }
@@ -53,13 +89,6 @@ bb11:                                             ; preds = %bb2
   resume { i8*, i32 } %tmp3
 }
 
-; Test that comdat function's address is recorded.
-; LOWER: @__profd_foo3.[[FOO3_HASH:[0-9]+]] = linkonce_odr{{.*}}@__profc_foo3.[[FOO3_HASH]]
-; Function Attrs: nounwind uwtable
-define linkonce_odr i32 @foo3()  comdat  {
-  ret i32 1
-}
-
 declare i32 @__gxx_personality_v0(...)
 
 ; Function Attrs: nounwind readnone


        


More information about the llvm-commits mailing list