r309195 - Add branch weights to branches for static initializers.

Richard Smith via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 26 15:01:09 PDT 2017


Author: rsmith
Date: Wed Jul 26 15:01:09 2017
New Revision: 309195

URL: http://llvm.org/viewvc/llvm-project?rev=309195&view=rev
Log:
Add branch weights to branches for static initializers.

The initializer for a static local variable cannot be hot, because it runs at
most once per program. That's not quite the same thing as having a low branch
probability, but under the assumption that the function is invoked many times,
modeling this as a branch probability seems reasonable.

For TLS variables, the situation is less clear, since the initialization side
of the branch can run multiple times in a program execution, but we still
expect initialization to be rare relative to non-initialization uses. It would
seem worthwhile to add a PGO counter along this path to make this estimation
more accurate in future.

For globals with guarded initialization, we don't yet apply any branch weights.
Due to our use of COMDATs, the guard will be reached exactly once per DSO, but
we have no idea how many DSOs will define the variable.

Added:
    cfe/trunk/test/CodeGenCXX/static-initializer-branch-weights.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGDeclCXX.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp
    cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp
    cfe/trunk/test/CodeGenCXX/microsoft-abi-static-initializers.cpp
    cfe/trunk/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
    cfe/trunk/test/CodeGenCXX/static-init-wasm.cpp

Modified: cfe/trunk/lib/CodeGen/CGDeclCXX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDeclCXX.cpp?rev=309195&r1=309194&r2=309195&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGDeclCXX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDeclCXX.cpp Wed Jul 26 15:01:09 2017
@@ -18,6 +18,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/Path.h"
 
 using namespace clang;
@@ -259,6 +260,43 @@ void CodeGenFunction::EmitCXXGuardedInit
   CGM.getCXXABI().EmitGuardedInit(*this, D, DeclPtr, PerformInit);
 }
 
+void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit,
+                                               llvm::BasicBlock *InitBlock,
+                                               llvm::BasicBlock *NoInitBlock,
+                                               GuardKind Kind,
+                                               const VarDecl *D) {
+  assert((Kind == GuardKind::TlsGuard || D) && "no guarded variable");
+
+  // A guess at how many times we will enter the initialization of a
+  // variable, depending on the kind of variable.
+  static const uint64_t InitsPerTLSVar = 1024;
+  static const uint64_t InitsPerLocalVar = 1024 * 1024;
+
+  llvm::MDNode *Weights;
+  if (Kind == GuardKind::VariableGuard && !D->isLocalVarDecl()) {
+    // For non-local variables, don't apply any weighting for now. Due to our
+    // use of COMDATs, we expect there to be at most one initialization of the
+    // variable per DSO, but we have no way to know how many DSOs will try to
+    // initialize the variable.
+    Weights = nullptr;
+  } else {
+    uint64_t NumInits;
+    // FIXME: For the TLS case, collect and use profiling information to
+    // determine a more accurate brach weight.
+    if (Kind == GuardKind::TlsGuard || D->getTLSKind())
+      NumInits = InitsPerTLSVar;
+    else
+      NumInits = InitsPerLocalVar;
+
+    // The probability of us entering the initializer is
+    //   1 / (total number of times we attempt to initialize the variable).
+    llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+    Weights = MDHelper.createBranchWeights(1, NumInits - 1);
+  }
+
+  Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights);
+}
+
 llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
     llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI,
     SourceLocation Loc, bool TLS) {
@@ -539,7 +577,8 @@ CodeGenFunction::GenerateCXXGlobalInitFu
                                                  "guard.uninitialized");
       llvm::BasicBlock *InitBlock = createBasicBlock("init");
       ExitBlock = createBasicBlock("exit");
-      Builder.CreateCondBr(Uninit, InitBlock, ExitBlock);
+      EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock,
+                               GuardKind::TlsGuard, nullptr);
       EmitBlock(InitBlock);
       // Mark as initialized before initializing anything else. If the
       // initializers use previously-initialized thread_local vars, that's

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=309195&r1=309194&r2=309195&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Wed Jul 26 15:01:09 2017
@@ -3496,6 +3496,14 @@ public:
   void EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr,
                           bool PerformInit);
 
+  enum class GuardKind { VariableGuard, TlsGuard };
+
+  /// Emit a branch to select whether or not to perform guarded initialization.
+  void EmitCXXGuardedInitBranch(llvm::Value *NeedsInit,
+                                llvm::BasicBlock *InitBlock,
+                                llvm::BasicBlock *NoInitBlock,
+                                GuardKind Kind, const VarDecl *D);
+
   /// GenerateCXXGlobalInitFunc - Generates code for initializing global
   /// variables.
   void GenerateCXXGlobalInitFunc(llvm::Function *Fn,

Modified: cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp?rev=309195&r1=309194&r2=309195&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp (original)
+++ cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp Wed Jul 26 15:01:09 2017
@@ -2113,13 +2113,14 @@ void ItaniumCXXABI::EmitGuardedInit(Code
       (UseARMGuardVarABI && !useInt8GuardVariable)
           ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
           : LI;
-  llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
+  llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized");
 
   llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
 
   // Check if the first byte of the guard variable is zero.
-  Builder.CreateCondBr(isInitialized, InitCheckBlock, EndBlock);
+  CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock,
+                               CodeGenFunction::GuardKind::VariableGuard, &D);
 
   CGF.EmitBlock(InitCheckBlock);
 

Modified: cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp?rev=309195&r1=309194&r2=309195&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp (original)
+++ cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp Wed Jul 26 15:01:09 2017
@@ -2463,11 +2463,12 @@ void MicrosoftCXXABI::EmitGuardedInit(Co
     // Test our bit from the guard variable.
     llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum);
     llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr);
-    llvm::Value *IsInitialized =
-        Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
+    llvm::Value *NeedsInit =
+        Builder.CreateICmpEQ(Builder.CreateAnd(LI, Bit), Zero);
     llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
     llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
-    Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock);
+    CGF.EmitCXXGuardedInitBranch(NeedsInit, InitBlock, EndBlock,
+                                 CodeGenFunction::GuardKind::VariableGuard, &D);
 
     // Set our bit in the guard variable and emit the initializer and add a global
     // destructor if appropriate.
@@ -2502,7 +2503,8 @@ void MicrosoftCXXABI::EmitGuardedInit(Co
         Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch);
     llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt");
     llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
-    Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock);
+    CGF.EmitCXXGuardedInitBranch(IsUninitialized, AttemptInitBlock, EndBlock,
+                                 CodeGenFunction::GuardKind::VariableGuard, &D);
 
     // This BasicBlock attempts to determine whether or not this thread is
     // responsible for doing the initialization.

Modified: cfe/trunk/test/CodeGenCXX/microsoft-abi-static-initializers.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/microsoft-abi-static-initializers.cpp?rev=309195&r1=309194&r2=309195&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/microsoft-abi-static-initializers.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/microsoft-abi-static-initializers.cpp Wed Jul 26 15:01:09 2017
@@ -146,7 +146,7 @@ inline S &getS() {
 // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.S* @"\01?getS@@YAAAUS@@XZ"() {{.*}} comdat
 // CHECK: load i32, i32* @"\01??_B?1??getS@@YAAAUS@@XZ at 51"
 // CHECK: and i32 {{.*}}, 1
-// CHECK: icmp ne i32 {{.*}}, 0
+// CHECK: icmp eq i32 {{.*}}, 0
 // CHECK: br i1
 //   init:
 // CHECK: or i32 {{.*}}, 1

Modified: cfe/trunk/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp?rev=309195&r1=309194&r2=309195&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp Wed Jul 26 15:01:09 2017
@@ -24,8 +24,8 @@ extern inline S &f() {
   static thread_local S s;
 // CHECK:       %[[guard:.*]] = load i32, i32* @"\01??__J?1??f@@YAAAUS@@XZ at 51"
 // CHECK-NEXT:  %[[mask:.*]] = and i32 %[[guard]], 1
-// CHECK-NEXT:  %[[cmp:.*]] = icmp ne i32 %[[mask]], 0
-// CHECK-NEXT:  br i1 %[[cmp]], label %[[init_end:.*]], label %[[init:.*]]
+// CHECK-NEXT:  %[[cmp:.*]] = icmp eq i32 %[[mask]], 0
+// CHECK-NEXT:  br i1 %[[cmp]], label %[[init:.*]], label %[[init_end:.*]], !prof ![[unlikely_threadlocal:.*]]
 //
 // CHECK:     [[init]]:
 // CHECK-NEXT:  %[[or:.*]] = or i32 %[[guard]], 1
@@ -56,7 +56,7 @@ extern inline S &g() {
 // CHECK:  %[[guard:.*]] = load atomic i32, i32* @"\01?$TSS0@?1??g@@YAAAUS@@XZ at 4HA" unordered, align 4
 // CHECK-NEXT:  %[[epoch:.*]] = load i32, i32* @_Init_thread_epoch
 // CHECK-NEXT:  %[[cmp:.*]] = icmp sgt i32 %[[guard]], %[[epoch]]
-// CHECK-NEXT:  br i1 %[[cmp]], label %[[init_attempt:.*]], label %[[init_end:.*]]
+// CHECK-NEXT:  br i1 %[[cmp]], label %[[init_attempt:.*]], label %[[init_end:.*]], !prof ![[unlikely_staticlocal:.*]]
 //
 // CHECK:     [[init_attempt]]:
 // CHECK-NEXT:  call void @_Init_thread_header(i32* @"\01?$TSS0@?1??g@@YAAAUS@@XZ at 4HA")
@@ -95,3 +95,6 @@ int g1() {
   static int i = f1();
   return i;
 }
+
+// CHECK-DAG: ![[unlikely_threadlocal]] = !{!"branch_weights", i32 1, i32 1023}
+// CHECK-DAG: ![[unlikely_staticlocal]] = !{!"branch_weights", i32 1, i32 1048575}

Modified: cfe/trunk/test/CodeGenCXX/static-init-wasm.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/static-init-wasm.cpp?rev=309195&r1=309194&r2=309195&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/static-init-wasm.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/static-init-wasm.cpp Wed Jul 26 15:01:09 2017
@@ -20,7 +20,7 @@ void g() {
 // WEBASSEMBLY32:       %[[R0:.+]] = load atomic i8, i8* bitcast (i32* @_ZGVZ1gvE1a to i8*) acquire, align 4
 // WEBASSEMBLY32-NEXT:  %[[R1:.+]] = and i8 %[[R0]], 1
 // WEBASSEMBLY32-NEXT:  %[[R2:.+]] = icmp eq i8 %[[R1]], 0
-// WEBASSEMBLY32-NEXT:  br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]]
+// WEBASSEMBLY32-NEXT:  br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]],
 // WEBASSEMBLY32:       [[CHECK]]
 // WEBASSEMBLY32:       call i32 @__cxa_guard_acquire
 // WEBASSEMBLY32:       [[END]]
@@ -30,7 +30,7 @@ void g() {
 // WEBASSEMBLY64:       %[[R0:.+]] = load atomic i8, i8* bitcast (i64* @_ZGVZ1gvE1a to i8*) acquire, align 8
 // WEBASSEMBLY64-NEXT:  %[[R1:.+]] = and i8 %[[R0]], 1
 // WEBASSEMBLY64-NEXT:  %[[R2:.+]] = icmp eq i8 %[[R1]], 0
-// WEBASSEMBLY64-NEXT:  br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]]
+// WEBASSEMBLY64-NEXT:  br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]],
 // WEBASSEMBLY64:       [[CHECK]]
 // WEBASSEMBLY64:       call i32 @__cxa_guard_acquire
 // WEBASSEMBLY64:       [[END]]

Added: cfe/trunk/test/CodeGenCXX/static-initializer-branch-weights.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/static-initializer-branch-weights.cpp?rev=309195&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCXX/static-initializer-branch-weights.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/static-initializer-branch-weights.cpp Wed Jul 26 15:01:09 2017
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -emit-llvm -std=c++1z %s -o - -triple=x86_64-linux-gnu | FileCheck %s
+
+struct S { S(); ~S(); };
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @global)
+S global;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// FIXME: Do we really need thread-safe initialization here? We don't run
+// global ctors on multiple threads. (If we were to do so, we'd need thread-safe
+// init for B<int>::member and B<int>::inline_member too.)
+// CHECK: load atomic i8, i8* bitcast (i64* @_ZGV13inline_global to i8*) acquire,
+// CHECK: icmp eq i8 {{.*}}, 0
+// CHECK: br i1
+// CHECK-NOT: !prof
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @inline_global)
+inline S inline_global;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @thread_local_global)
+thread_local S thread_local_global;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK: load i8, i8* bitcast (i64* @_ZGV26thread_local_inline_global to i8*)
+// CHECK: icmp eq i8 {{.*}}, 0
+// CHECK: br i1
+// CHECK-NOT: !prof
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @thread_local_inline_global)
+thread_local inline S thread_local_inline_global;
+
+struct A {
+  static S member;
+  static thread_local S thread_local_member;
+
+  // CHECK-LABEL: define {{.*}}global_var_init
+  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVN1A13inline_memberE to i8*) acquire,
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1
+  // CHECK-NOT: !prof
+  // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A13inline_memberE)
+  static inline S inline_member;
+
+  // CHECK-LABEL: define {{.*}}global_var_init
+  // CHECK: load i8, i8* bitcast (i64* @_ZGVN1A26thread_local_inline_memberE to i8*)
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1
+  // CHECK-NOT: !prof
+  // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A26thread_local_inline_memberE)
+  static thread_local inline S thread_local_inline_member;
+};
+
+// CHECK-LABEL: define void @_Z1fv()
+void f() {
+  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVZ1fvE12static_local to i8*) acquire,
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_LOCAL:[0-9]*]]
+  static S static_local;
+
+  // CHECK: load i8, i8* @_ZGVZ1fvE19static_thread_local,
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL:[0-9]*]]
+  static thread_local S static_thread_local;
+}
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A6memberE)
+S A::member;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A19thread_local_memberE)
+thread_local S A::thread_local_member;
+
+template <typename T> struct B {
+  // CHECK-LABEL: define {{.*}}global_var_init
+  // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE6memberE to i8*)
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1
+  // CHECK-NOT: !prof
+  // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE6memberE)
+  static S member;
+
+  // CHECK-LABEL: define {{.*}}global_var_init
+  // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE13inline_memberE to i8*)
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1
+  // CHECK-NOT: !prof
+  // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE13inline_memberE)
+  static inline S inline_member;
+
+  // CHECK-LABEL: define {{.*}}global_var_init
+  // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE19thread_local_memberE to i8*)
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1
+  // CHECK-NOT: !prof
+  // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE19thread_local_memberE)
+  static thread_local S thread_local_member;
+
+  // CHECK-LABEL: define {{.*}}global_var_init
+  // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE26thread_local_inline_memberE to i8*)
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1
+  // CHECK-NOT: !prof
+  // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE26thread_local_inline_memberE)
+  static thread_local inline S thread_local_inline_member;
+};
+template<typename T> S B<T>::member;
+template<typename T> thread_local S B<T>::thread_local_member;
+
+template<typename ...T> void use(T &...);
+void use_b() {
+  use(B<int>::member, B<int>::inline_member, B<int>::thread_local_member,
+      B<int>::thread_local_inline_member);
+}
+
+// CHECK-LABEL: define {{.*}}tls_init()
+// CHECK: load i8, i8* @__tls_guard, align 1
+// CHECK: icmp eq i8 {{.*}}, 0
+// CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL]]
+
+// CHECK-DAG: ![[WEIGHTS_THREAD_LOCAL]] = !{!"branch_weights", i32 1, i32 1023}
+// CHECK-DAG: ![[WEIGHTS_LOCAL]] = !{!"branch_weights", i32 1, i32 1048575}




More information about the cfe-commits mailing list