r221663 - [OPENMP] Codegen for threadprivate variables

Alexey Bataev a.bataev at hotmail.com
Mon Nov 10 20:05:40 PST 2014


Author: abataev
Date: Mon Nov 10 22:05:39 2014
New Revision: 221663

URL: http://llvm.org/viewvc/llvm-project?rev=221663&view=rev
Log:
[OPENMP] Codegen for threadprivate variables
For all threadprivate variables which have constructor/destructor emit call to void __kmpc_threadprivate_register(ident_t * <Current Location>, void *<Original Global Addr>, kmpc_ctor <Constructor>, kmpc_cctor NULL, kmpc_dtor <Destructor>); 
In expressions all references to such variables are replaced by calls to void *__kmpc_threadprivate_cached(ident_t *<Current Location>, kmp_int32 <Current Thread Id>, void *<Original Global Addr>, size_t <Size of Data>, void ***<Pointer to autogenerated cache – array of private copies of threadprivate variable>);
Test test/OpenMP/threadprivate_codegen.cpp checks that codegen is correct. Also it checks that codegen is correct after serialization/deserialization and one of passes verifies debug info.
Differential Revision: http://reviews.llvm.org/D4002

Added:
    cfe/trunk/test/OpenMP/threadprivate_codegen.cpp   (with props)
    cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp   (with props)
Modified:
    cfe/trunk/include/clang/AST/ASTMutationListener.h
    cfe/trunk/include/clang/AST/DeclOpenMP.h
    cfe/trunk/include/clang/Basic/Attr.td
    cfe/trunk/include/clang/Serialization/ASTWriter.h
    cfe/trunk/lib/AST/ASTContext.cpp
    cfe/trunk/lib/CodeGen/CGDeclCXX.cpp
    cfe/trunk/lib/CodeGen/CGExpr.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/lib/CodeGen/CodeGenModule.h
    cfe/trunk/lib/Frontend/MultiplexConsumer.cpp
    cfe/trunk/lib/Parse/Parser.cpp
    cfe/trunk/lib/Sema/SemaOpenMP.cpp
    cfe/trunk/lib/Serialization/ASTCommon.h
    cfe/trunk/lib/Serialization/ASTReaderDecl.cpp
    cfe/trunk/lib/Serialization/ASTWriter.cpp

Modified: cfe/trunk/include/clang/AST/ASTMutationListener.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/ASTMutationListener.h?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/ASTMutationListener.h (original)
+++ cfe/trunk/include/clang/AST/ASTMutationListener.h Mon Nov 10 22:05:39 2014
@@ -102,6 +102,12 @@ public:
   /// \param D the declaration marked used
   virtual void DeclarationMarkedUsed(const Decl *D) {}
 
+  /// \brief A declaration is marked as OpenMP threadprivate which was not
+  /// previously marked as threadprivate.
+  ///
+  /// \param D the declaration marked OpenMP threadprivate.
+  virtual void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) {}
+
   // NOTE: If new methods are added they should also be added to
   // MultiplexASTMutationListener.
 };

Modified: cfe/trunk/include/clang/AST/DeclOpenMP.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/DeclOpenMP.h?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/DeclOpenMP.h (original)
+++ cfe/trunk/include/clang/AST/DeclOpenMP.h Mon Nov 10 22:05:39 2014
@@ -19,6 +19,7 @@
 #include "llvm/ADT/ArrayRef.h"
 
 namespace clang {
+class Expr;
 
 /// \brief This represents '#pragma omp threadprivate ...' directive.
 /// For example, in the following, both 'a' and 'A::b' are threadprivate:

Modified: cfe/trunk/include/clang/Basic/Attr.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/Attr.td (original)
+++ cfe/trunk/include/clang/Basic/Attr.td Mon Nov 10 22:05:39 2014
@@ -1926,3 +1926,9 @@ def CapturedRecord : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def OMPThreadPrivateDecl : InheritableAttr {
+  // This attribute has no spellings as it is only ever created implicitly.
+  let Spellings = [];
+  let SemaHandler = 0;
+  let Documentation = [Undocumented];
+}

Modified: cfe/trunk/include/clang/Serialization/ASTWriter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTWriter.h?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTWriter.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTWriter.h Mon Nov 10 22:05:39 2014
@@ -798,6 +798,7 @@ public:
                                     const ObjCPropertyDecl *OrigProp,
                                     const ObjCCategoryDecl *ClassExt) override;
   void DeclarationMarkedUsed(const Decl *D) override;
+  void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
 };
 
 /// \brief AST and semantic-analysis consumer that generates a

Modified: cfe/trunk/lib/AST/ASTContext.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/AST/ASTContext.cpp (original)
+++ cfe/trunk/lib/AST/ASTContext.cpp Mon Nov 10 22:05:39 2014
@@ -7904,7 +7904,9 @@ bool ASTContext::DeclMustBeEmitted(const
     // We never need to emit an uninstantiated function template.
     if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
       return false;
-  } else
+  } else if (isa<OMPThreadPrivateDecl>(D))
+    return true;
+  else
     return false;
 
   // If this is a member of a class template, we do not need to emit it.

Modified: cfe/trunk/lib/CodeGen/CGDeclCXX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDeclCXX.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGDeclCXX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDeclCXX.cpp Mon Nov 10 22:05:39 2014
@@ -14,6 +14,7 @@
 #include "CodeGenFunction.h"
 #include "CGCXXABI.h"
 #include "CGObjCRuntime.h"
+#include "CGOpenMPRuntime.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Intrinsics.h"
@@ -139,6 +140,10 @@ void CodeGenFunction::EmitCXXGlobalVarDe
   QualType T = D.getType();
 
   if (!T->isReferenceType()) {
+    if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>())
+      (void)CGM.getOpenMPRuntime().EmitOMPThreadPrivateVarDefinition(
+          &D, DeclPtr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(),
+          PerformInit, this);
     if (PerformInit)
       EmitDeclInit(*this, D, DeclPtr);
     if (CGM.isTypeConstant(D.getType(), true))

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Mon Nov 10 22:05:39 2014
@@ -16,6 +16,7 @@
 #include "CGCall.h"
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
+#include "CGOpenMPRuntime.h"
 #include "CGRecordLayout.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
@@ -1802,6 +1803,14 @@ EmitBitCastOfLValueToProperType(CodeGenF
   return CGF.Builder.CreateBitCast(V, IRType->getPointerTo(AS), Name);
 }
 
+static LValue EmitThreadPrivateVarDeclLValue(
+    CodeGenFunction &CGF, const VarDecl *VD, QualType T, llvm::Value *V,
+    llvm::Type *RealVarTy, CharUnits Alignment, SourceLocation Loc) {
+  V = CGF.CGM.getOpenMPRuntime().getOMPAddrOfThreadPrivate(CGF, VD, V, Loc);
+  V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
+  return CGF.MakeAddrLValue(V, T, Alignment);
+}
+
 static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
                                       const Expr *E, const VarDecl *VD) {
   QualType T = E->getType();
@@ -1816,6 +1825,11 @@ static LValue EmitGlobalVarDeclLValue(Co
   V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
   CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
   LValue LV;
+  // Emit reference to the private copy of the variable if it is an OpenMP
+  // threadprivate variable.
+  if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>())
+    return EmitThreadPrivateVarDeclLValue(CGF, VD, T, V, RealVarTy, Alignment,
+                                          E->getExprLoc());
   if (VD->getType()->isReferenceType()) {
     llvm::LoadInst *LI = CGF.Builder.CreateLoad(V);
     LI->setAlignment(Alignment.getQuantity());
@@ -1929,6 +1943,12 @@ LValue CodeGenFunction::EmitDeclRefLValu
       V = CGM.getOrCreateStaticVarDecl(
           *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false));
 
+    // Check if variable is threadprivate.
+    if (V && getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>())
+      return EmitThreadPrivateVarDeclLValue(
+          *this, VD, T, V, getTypes().ConvertTypeForMem(VD->getType()),
+          Alignment, E->getExprLoc());
+
     // Use special handling for lambdas.
     if (!V) {
       if (FieldDecl *FD = LambdaCaptureFields.lookup(VD)) {

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Nov 10 22:05:39 2014
@@ -271,6 +271,17 @@ CGOpenMPRuntime::CreateRuntimeFunction(O
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
     break;
   }
+  case OMPRTL__kmpc_threadprivate_cached: {
+    // Build void *__kmpc_threadprivate_cached(ident_t *loc,
+    // kmp_int32 global_tid, void *data, size_t size, void ***cache);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.VoidPtrTy, CGM.SizeTy,
+                                CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
+    break;
+  }
   case OMPRTL__kmpc_critical: {
     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
     // kmp_critical_name *crit);
@@ -282,6 +293,29 @@ CGOpenMPRuntime::CreateRuntimeFunction(O
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
     break;
   }
+  case OMPRTL__kmpc_threadprivate_register: {
+    // Build void __kmpc_threadprivate_register(ident_t *, void *data,
+    // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
+    // typedef void *(*kmpc_ctor)(void *);
+    auto KmpcCtorTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
+                                /*isVarArg*/ false)->getPointerTo();
+    // typedef void *(*kmpc_cctor)(void *, void *);
+    llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+    auto KmpcCopyCtorTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
+                                /*isVarArg*/ false)->getPointerTo();
+    // typedef void (*kmpc_dtor)(void *);
+    auto KmpcDtorTy =
+        llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
+            ->getPointerTo();
+    llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
+                              KmpcCopyCtorTy, KmpcDtorTy};
+    auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
+                                        /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
+    break;
+  }
   case OMPRTL__kmpc_end_critical: {
     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
     // kmp_critical_name *crit);
@@ -333,6 +367,157 @@ CGOpenMPRuntime::CreateRuntimeFunction(O
   return RTLFn;
 }
 
+llvm::Constant *
+CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
+  // Lookup the entry, lazily creating it if necessary.
+  return GetOrCreateInternalVariable(CGM.Int8PtrPtrTy,
+                                     Twine(CGM.getMangledName(VD)) + ".cache.");
+}
+
+llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF,
+                                                        const VarDecl *VD,
+                                                        llvm::Value *VDAddr,
+                                                        SourceLocation Loc) {
+  auto VarTy = VDAddr->getType()->getPointerElementType();
+  llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
+                         GetOpenMPThreadID(CGF, Loc),
+                         CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
+                         CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
+                         getOrCreateThreadPrivateCache(VD)};
+  return CGF.EmitRuntimeCall(
+      CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
+}
+
+void CGOpenMPRuntime::EmitOMPThreadPrivateVarInit(
+    CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
+    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
+  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
+  // library.
+  auto OMPLoc = EmitOpenMPUpdateLocation(CGF, Loc);
+  CGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num),
+                      OMPLoc);
+  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
+  // to register constructor/destructor for variable.
+  llvm::Value *Args[] = {OMPLoc,
+                         CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
+                         Ctor, CopyCtor, Dtor};
+  CGF.EmitRuntimeCall(CreateRuntimeFunction(
+                          CGOpenMPRuntime::OMPRTL__kmpc_threadprivate_register),
+                      Args);
+}
+
+llvm::Function *CGOpenMPRuntime::EmitOMPThreadPrivateVarDefinition(
+    const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
+    bool PerformInit, CodeGenFunction *CGF) {
+  VD = VD->getDefinition(CGM.getContext());
+  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
+    ThreadPrivateWithDefinition.insert(VD);
+    QualType ASTTy = VD->getType();
+
+    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
+    auto Init = VD->getAnyInitializer();
+    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
+      // Generate function that re-emits the declaration's initializer into the
+      // threadprivate copy of the variable VD
+      CodeGenFunction CtorCGF(CGM);
+      FunctionArgList Args;
+      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
+                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+      Args.push_back(&Dst);
+
+      auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+          CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
+          /*isVariadic=*/false);
+      auto FTy = CGM.getTypes().GetFunctionType(FI);
+      auto Fn = CGM.CreateGlobalInitOrDestructFunction(
+          FTy, ".__kmpc_global_ctor_.", Loc);
+      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
+                            Args, SourceLocation());
+      auto ArgVal = CtorCGF.EmitLoadOfScalar(
+          CtorCGF.GetAddrOfLocalVar(&Dst),
+          /*Volatile=*/false, CGM.PointerAlignInBytes,
+          CGM.getContext().VoidPtrTy, Dst.getLocation());
+      auto Arg = CtorCGF.Builder.CreatePointerCast(
+          ArgVal,
+          CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
+      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
+                               /*IsInitializer=*/true);
+      ArgVal = CtorCGF.EmitLoadOfScalar(
+          CtorCGF.GetAddrOfLocalVar(&Dst),
+          /*Volatile=*/false, CGM.PointerAlignInBytes,
+          CGM.getContext().VoidPtrTy, Dst.getLocation());
+      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
+      CtorCGF.FinishFunction();
+      Ctor = Fn;
+    }
+    if (VD->getType().isDestructedType() != QualType::DK_none) {
+      // Generate function that emits destructor call for the threadprivate copy
+      // of the variable VD
+      CodeGenFunction DtorCGF(CGM);
+      FunctionArgList Args;
+      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
+                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+      Args.push_back(&Dst);
+
+      auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+          CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
+          /*isVariadic=*/false);
+      auto FTy = CGM.getTypes().GetFunctionType(FI);
+      auto Fn = CGM.CreateGlobalInitOrDestructFunction(
+          FTy, ".__kmpc_global_dtor_.", Loc);
+      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
+                            SourceLocation());
+      auto ArgVal = DtorCGF.EmitLoadOfScalar(
+          DtorCGF.GetAddrOfLocalVar(&Dst),
+          /*Volatile=*/false, CGM.PointerAlignInBytes,
+          CGM.getContext().VoidPtrTy, Dst.getLocation());
+      DtorCGF.emitDestroy(ArgVal, ASTTy,
+                          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
+                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
+      DtorCGF.FinishFunction();
+      Dtor = Fn;
+    }
+    // Do not emit init function if it is not required.
+    if (!Ctor && !Dtor)
+      return nullptr;
+
+    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+    auto CopyCtorTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
+                                /*isVarArg=*/false)->getPointerTo();
+    // Copying constructor for the threadprivate variable.
+    // Must be NULL - reserved by runtime, but currently it requires that this
+    // parameter is always NULL. Otherwise it fires assertion.
+    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
+    if (Ctor == nullptr) {
+      auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
+                                            /*isVarArg=*/false)->getPointerTo();
+      Ctor = llvm::Constant::getNullValue(CtorTy);
+    }
+    if (Dtor == nullptr) {
+      auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
+                                            /*isVarArg=*/false)->getPointerTo();
+      Dtor = llvm::Constant::getNullValue(DtorTy);
+    }
+    if (!CGF) {
+      auto InitFunctionTy =
+          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
+      auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
+          InitFunctionTy, ".__omp_threadprivate_init_.");
+      CodeGenFunction InitCGF(CGM);
+      FunctionArgList ArgList;
+      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
+                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
+                            Loc);
+      EmitOMPThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
+      InitCGF.FinishFunction();
+      return InitFunction;
+    }
+    EmitOMPThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
+  }
+  return nullptr;
+}
+
 void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF,
                                           SourceLocation Loc,
                                           llvm::Value *OutlinedFn,
@@ -398,21 +583,31 @@ llvm::Value *CGOpenMPRuntime::EmitThread
   return ThreadIDTemp;
 }
 
-llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) {
+llvm::Constant *
+CGOpenMPRuntime::GetOrCreateInternalVariable(llvm::Type *Ty,
+                                             const llvm::Twine &Name) {
   SmallString<256> Buffer;
   llvm::raw_svector_ostream Out(Buffer);
-  Out << ".gomp_critical_user_" << CriticalName << ".var";
-  auto RuntimeCriticalName = Out.str();
-  auto &Elem = CriticalRegionVarNames.GetOrCreateValue(RuntimeCriticalName);
-  if (Elem.getValue() != nullptr)
-    return Elem.getValue();
+  Out << Name;
+  auto RuntimeName = Out.str();
+  auto &Elem = InternalVars.GetOrCreateValue(RuntimeName);
+  if (Elem.getValue()) {
+    assert(Elem.getValue()->getType()->getPointerElementType() == Ty &&
+           "OMP internal variable has different type than requested");
+    return &*Elem.getValue();
+  }
 
-  auto Lock = new llvm::GlobalVariable(
-      CGM.getModule(), KmpCriticalNameTy, /*IsConstant*/ false,
+  auto Item = new llvm::GlobalVariable(
+      CGM.getModule(), Ty, /*IsConstant*/ false,
       llvm::GlobalValue::CommonLinkage,
-      llvm::Constant::getNullValue(KmpCriticalNameTy), Elem.getKey());
-  Elem.setValue(Lock);
-  return Lock;
+      llvm::Constant::getNullValue(Ty), Elem.getKey());
+  Elem.setValue(Item);
+  return Item;
+}
+
+llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) {
+  llvm::Twine Name(".gomp_critical_user_", CriticalName);
+  return GetOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
 }
 
 void CGOpenMPRuntime::EmitOMPCriticalRegionStart(CodeGenFunction &CGF,

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Mon Nov 10 22:05:39 2014
@@ -16,19 +16,23 @@
 
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/ValueHandle.h"
 
 namespace llvm {
 class ArrayType;
 class Constant;
 class Function;
 class FunctionType;
+class GlobalVariable;
 class StructType;
 class Type;
 class Value;
 } // namespace llvm
 
 namespace clang {
+class VarDecl;
 
 class OMPExecutableDirective;
 class VarDecl;
@@ -62,9 +66,15 @@ public:
     OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
   };
   enum OpenMPRTLFunction {
-    // Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
-    // microtask, ...);
+    /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
+    /// kmpc_micro microtask, ...);
     OMPRTL__kmpc_fork_call,
+    /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
+    /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
+    OMPRTL__kmpc_threadprivate_cached,
+    /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
+    /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
+    OMPRTL__kmpc_threadprivate_register,
     // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
     OMPRTL__kmpc_global_thread_num,
     // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
@@ -155,8 +165,13 @@ private:
   /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32
   /// kmp_critical_name[8];
   llvm::ArrayType *KmpCriticalNameTy;
-  /// \brief Map of critical regions names and the corresponding lock objects.
-  llvm::StringMap<llvm::Value *, llvm::BumpPtrAllocator> CriticalRegionVarNames;
+  /// \brief An ordered map of auto-generated variables to their unique names.
+  /// It stores variables with the following names: 1) ".gomp_critical_user_" +
+  /// <critical_section_name> + ".var" for "omp critical" directives; 2)
+  /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
+  /// variables.
+  llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
+      InternalVars;
 
   /// \brief Emits object of ident_t type with info for source location.
   /// \param Flags Flags for OpenMP location.
@@ -176,6 +191,13 @@ private:
   /// \return Specified function.
   llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function);
 
+  /// \brief If the specified mangled name is not in the module, create and
+  /// return threadprivate cache object. This object is a pointer's worth of
+  /// storage that's reserved for use by the OpenMP runtime.
+  /// \param D Threadprivate variable.
+  /// \return Cache variable for the specified threadprivate.
+  llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD);
+
   /// \brief Emits address of the word in a memory where current thread id is
   /// stored.
   virtual llvm::Value *EmitThreadIDAddress(CodeGenFunction &CGF,
@@ -185,6 +207,28 @@ private:
   ///
   llvm::Value *GetOpenMPThreadID(CodeGenFunction &CGF, SourceLocation Loc);
 
+  /// \brief Gets (if variable with the given name already exist) or creates
+  /// internal global variable with the specified Name. The created variable has
+  /// linkage CommonLinkage by default and is initialized by null value.
+  /// \param Ty Type of the global variable. If it is exist already the type
+  /// must be the same.
+  /// \param Name Name of the variable.
+  llvm::Constant *GetOrCreateInternalVariable(llvm::Type *Ty,
+                                              const llvm::Twine &Name);
+
+  /// \brief Set of threadprivate variables with the generated initializer.
+  llvm::DenseSet<const VarDecl *> ThreadPrivateWithDefinition;
+
+  /// \brief Emits initialization code for the threadprivate variables.
+  /// \param VDAddr Address of the global variable \a VD.
+  /// \param Ctor Pointer to a global init function for \a VD.
+  /// \param CopyCtor Pointer to a global copy function for \a VD.
+  /// \param Dtor Pointer to a global destructor function for \a VD.
+  /// \param Loc Location of threadprivate declaration.
+  void EmitOMPThreadPrivateVarInit(CodeGenFunction &CGF, llvm::Value *VDAddr,
+                                   llvm::Value *Ctor, llvm::Value *CopyCtor,
+                                   llvm::Value *Dtor, SourceLocation Loc);
+
 public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
   virtual ~CGOpenMPRuntime() {}
@@ -261,6 +305,30 @@ public:
   virtual void EmitOMPNumThreadsClause(CodeGenFunction &CGF,
                                        llvm::Value *NumThreads,
                                        SourceLocation Loc);
+
+  /// \brief Returns address of the threadprivate variable for the current
+  /// thread.
+  /// \param D Threadprivate variable.
+  /// \param VDAddr Address of the global variable \a VD.
+  /// \param Loc Location of the reference to threadprivate var.
+  /// \return Address of the threadprivate variable for the current thread.
+  virtual llvm::Value *getOMPAddrOfThreadPrivate(CodeGenFunction &CGF,
+                                                 const VarDecl *VD,
+                                                 llvm::Value *VDAddr,
+                                                 SourceLocation Loc);
+
+  /// \brief Emit a code for initialization of threadprivate variable. It emits
+  /// a call to runtime library which adds initial value to the newly created
+  /// threadprivate variable (if it is not constant) and registers destructor
+  /// for the variable (if any).
+  /// \param VD Threadprivate variable.
+  /// \param VDAddr Address of the global variable \a VD.
+  /// \param Loc Location of threadprivate declaration.
+  /// \param PerformInit true if initialization expression is not constant.
+  virtual llvm::Function *
+  EmitOMPThreadPrivateVarDefinition(const VarDecl *VD, llvm::Value *VDAddr,
+                                    SourceLocation Loc, bool PerformInit,
+                                    CodeGenFunction *CGF = nullptr);
 };
 } // namespace CodeGen
 } // namespace clang

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Nov 10 22:05:39 2014
@@ -3230,6 +3230,10 @@ void CodeGenModule::EmitTopLevelDecl(Dec
     break;
   }
 
+  case Decl::OMPThreadPrivate:
+    EmitOMPThreadPrivateDecl(cast<OMPThreadPrivateDecl>(D));
+    break;
+
   case Decl::ClassTemplateSpecialization: {
     const auto *Spec = cast<ClassTemplateSpecializationDecl>(D);
     if (DebugInfo &&
@@ -3506,3 +3510,18 @@ llvm::Constant *CodeGenModule::GetAddrOf
   return getCXXABI().getAddrOfRTTIDescriptor(Ty);
 }
 
+void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
+  for (auto RefExpr : D->varlists()) {
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(RefExpr)->getDecl());
+    bool PerformInit =
+        VD->getAnyInitializer() &&
+        !VD->getAnyInitializer()->isConstantInitializer(getContext(),
+                                                        /*ForRef=*/false);
+    if (auto InitFunction =
+            getOpenMPRuntime().EmitOMPThreadPrivateVarDefinition(
+                VD, GetAddrOfGlobalVar(VD), RefExpr->getLocStart(),
+                PerformInit))
+      CXXGlobalInits.push_back(InitFunction);
+  }
+}
+

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.h Mon Nov 10 22:05:39 2014
@@ -1087,6 +1087,11 @@ public:
   void setAliasAttributes(const Decl *D, llvm::GlobalValue *GV);
 
   void addReplacement(StringRef Name, llvm::Constant *C);
+
+  /// \brief Emit a code for threadprivate directive.
+  /// \param D Threadprivate declaration.
+  void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D);
+
 private:
   llvm::Constant *
   GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,

Modified: cfe/trunk/lib/Frontend/MultiplexConsumer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/MultiplexConsumer.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/MultiplexConsumer.cpp (original)
+++ cfe/trunk/lib/Frontend/MultiplexConsumer.cpp Mon Nov 10 22:05:39 2014
@@ -107,6 +107,7 @@ public:
                                     const ObjCPropertyDecl *OrigProp,
                                     const ObjCCategoryDecl *ClassExt) override;
   void DeclarationMarkedUsed(const Decl *D) override;
+  void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
 
 private:
   std::vector<ASTMutationListener*> Listeners;
@@ -180,6 +181,11 @@ void MultiplexASTMutationListener::Decla
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->DeclarationMarkedUsed(D);
 }
+void MultiplexASTMutationListener::DeclarationMarkedOpenMPThreadPrivate(
+    const Decl *D) {
+  for (size_t i = 0, e = Listeners.size(); i != e; ++i)
+    Listeners[i]->DeclarationMarkedOpenMPThreadPrivate(D);
+}
 
 }  // end namespace clang
 

Modified: cfe/trunk/lib/Parse/Parser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/Parser.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/Parse/Parser.cpp (original)
+++ cfe/trunk/lib/Parse/Parser.cpp Mon Nov 10 22:05:39 2014
@@ -634,8 +634,7 @@ Parser::ParseExternalDeclaration(ParsedA
     HandlePragmaOpenCLExtension();
     return DeclGroupPtrTy();
   case tok::annot_pragma_openmp:
-    ParseOpenMPDeclarativeDirective();
-    return DeclGroupPtrTy();
+    return ParseOpenMPDeclarativeDirective();
   case tok::annot_pragma_ms_pointers_to_members:
     HandlePragmaMSPointersToMembers();
     return DeclGroupPtrTy();

Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Mon Nov 10 22:05:39 2014
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclOpenMP.h"
@@ -847,6 +848,10 @@ Sema::CheckOMPThreadPrivateDecl(SourceLo
 
     Vars.push_back(RefExpr);
     DSAStack->addDSA(VD, DE, OMPC_threadprivate);
+    VD->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(
+        Context, SourceRange(Loc, Loc)));
+    if (auto *ML = Context.getASTMutationListener())
+      ML->DeclarationMarkedOpenMPThreadPrivate(VD);
   }
   OMPThreadPrivateDecl *D = nullptr;
   if (!Vars.empty()) {

Modified: cfe/trunk/lib/Serialization/ASTCommon.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTCommon.h?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTCommon.h (original)
+++ cfe/trunk/lib/Serialization/ASTCommon.h Mon Nov 10 22:05:39 2014
@@ -32,7 +32,8 @@ enum DeclUpdateKind {
   UPD_CXX_DEDUCED_RETURN_TYPE,
   UPD_DECL_MARKED_USED,
   UPD_MANGLING_NUMBER,
-  UPD_STATIC_LOCAL_NUMBER
+  UPD_STATIC_LOCAL_NUMBER,
+  UPD_DECL_MARKED_OPENMP_THREADPRIVATE
 };
 
 TypeIdx TypeIdxFromBuiltin(const BuiltinType *BT);

Modified: cfe/trunk/lib/Serialization/ASTReaderDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReaderDecl.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReaderDecl.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReaderDecl.cpp Mon Nov 10 22:05:39 2014
@@ -2274,7 +2274,8 @@ static bool isConsumerInterestedIn(Decl
   if (isa<FileScopeAsmDecl>(D) || 
       isa<ObjCProtocolDecl>(D) || 
       isa<ObjCImplDecl>(D) ||
-      isa<ImportDecl>(D))
+      isa<ImportDecl>(D) ||
+      isa<OMPThreadPrivateDecl>(D))
     return true;
   if (VarDecl *Var = dyn_cast<VarDecl>(D))
     return Var->isFileVarDecl() &&
@@ -3686,6 +3687,10 @@ void ASTDeclReader::UpdateDecl(Decl *D,
     case UPD_STATIC_LOCAL_NUMBER:
       Reader.Context.setStaticLocalNumber(cast<VarDecl>(D), Record[Idx++]);
       break;
+    case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
+      D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(
+          Reader.Context, ReadSourceRange(Record, Idx)));
+      break;
     }
   }
 }

Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=221663&r1=221662&r2=221663&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriter.cpp Mon Nov 10 22:05:39 2014
@@ -4788,6 +4788,10 @@ void ASTWriter::WriteDeclUpdatesBlocks(R
       case UPD_STATIC_LOCAL_NUMBER:
         Record.push_back(Update.getNumber());
         break;
+      case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
+        AddSourceRange(D->getAttr<OMPThreadPrivateDeclAttr>()->getRange(),
+                       Record);
+        break;
       }
     }
 
@@ -5881,3 +5885,11 @@ void ASTWriter::DeclarationMarkedUsed(co
 
   DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_USED));
 }
+
+void ASTWriter::DeclarationMarkedOpenMPThreadPrivate(const Decl *D) {
+  assert(!WritingAST && "Already writing the AST!");
+  if (!D->isFromASTFile())
+    return;
+
+  DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_OPENMP_THREADPRIVATE));
+}

Added: cfe/trunk/test/OpenMP/threadprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/threadprivate_codegen.cpp?rev=221663&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/threadprivate_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/threadprivate_codegen.cpp Mon Nov 10 22:05:39 2014
@@ -0,0 +1,707 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// CHECK-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] }
+// CHECK-DAG: [[S2:%.+]] = type { [[INT]], double }
+// CHECK-DAG: [[S3:%.+]] = type { [[INT]], float }
+// CHECK-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] }
+// CHECK-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] }
+// CHECK-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double }
+// CHECK-DEBUG-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DEBUG-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] }
+// CHECK-DEBUG-DAG: [[S2:%.+]] = type { [[INT]], double }
+// CHECK-DEBUG-DAG: [[S3:%.+]] = type { [[INT]], float }
+// CHECK-DEBUG-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] }
+// CHECK-DEBUG-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] }
+// CHECK-DEBUG-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double }
+
+struct S1 {
+  int a;
+  S1()
+      : a(0) {
+  }
+  S1(int a)
+      : a(a) {
+  }
+  S1(const S1 &s) {
+    a = 12 + s.a;
+  }
+  ~S1() {
+    a = 0;
+  }
+};
+
+struct S2 {
+  int a;
+  double b;
+  S2()
+      : a(0) {
+  }
+  S2(int a)
+      : a(a) {
+  }
+  S2(const S2 &s) {
+    a = 12 + s.a;
+  }
+  ~S2() {
+    a = 0;
+  }
+};
+
+struct S3 {
+  int a;
+  float b;
+  S3()
+      : a(0) {
+  }
+  S3(int a)
+      : a(a) {
+  }
+  S3(const S3 &s) {
+    a = 12 + s.a;
+  }
+  ~S3() {
+    a = 0;
+  }
+};
+
+struct S4 {
+  int a, b;
+  S4()
+      : a(0) {
+  }
+  S4(int a)
+      : a(a) {
+  }
+  S4(const S4 &s) {
+    a = 12 + s.a;
+  }
+  ~S4() {
+    a = 0;
+  }
+};
+
+struct S5 {
+  int a, b, c;
+  S5()
+      : a(0) {
+  }
+  S5(int a)
+      : a(a) {
+  }
+  S5(const S5 &s) {
+    a = 12 + s.a;
+  }
+  ~S5() {
+    a = 0;
+  }
+};
+
+// CHECK-DAG:  [[GS1:@.+]] = internal global [[S1]] zeroinitializer
+// CHECK-DAG:  [[GS1]].cache. = common global i8** null
+// CHECK-DAG:  [[DEFAULT_LOC:@.+]] = private unnamed_addr constant [[IDENT]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8]* {{@.+}}, i32 0, i32 0) }
+// CHECK-DAG:  [[GS2:@.+]] = internal global [[S2]] zeroinitializer
+// CHECK-DAG:  [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer
+// CHECK-DAG:  [[ARR_X]].cache. = common global i8** null
+// CHECK-DAG:  [[SM:@.+]] = internal global [[SMAIN]] zeroinitializer
+// CHECK-DAG:  [[SM]].cache. = common global i8** null
+// CHECK-DAG:  [[STATIC_S:@.+]] = external global [[S3]]
+// CHECK-DAG:  [[STATIC_S]].cache. = common global i8** null
+// CHECK-DAG:  [[GS3:@.+]] = external global [[S5]]
+// CHECK-DAG:  [[GS3]].cache. = common global i8** null
+// CHECK-DAG:  [[ST_INT_ST:@.+]] = linkonce_odr global i32 23
+// CHECK-DAG:  [[ST_INT_ST]].cache. = common global i8** null
+// CHECK-DAG:  [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01
+// CHECK-DAG:  [[ST_FLOAT_ST]].cache. = common global i8** null
+// CHECK-DAG:  [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer
+// CHECK-DAG:  [[ST_S4_ST]].cache. = common global i8** null
+// CHECK-NOT:  .cache. = common global i8** null
+// There is no cache for gs2 - it is not threadprivate. Check that there is only
+// 8 caches created (for Static::s, gs1, gs3, arr_x, main::sm, ST<int>::st,
+// ST<float>::st, ST<S4>::st)
+// CHECK-DEBUG-DAG: [[GS1:@.+]] = internal global [[S1]] zeroinitializer
+// CHECK-DEBUG-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer
+// CHECK-DEBUG-DAG: [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer
+// CHECK-DEBUG-DAG: [[SM:@.+]] = internal global [[SMAIN]] zeroinitializer
+// CHECK-DEBUG-DAG: [[STATIC_S:@.+]] = external global [[S3]]
+// CHECK-DEBUG-DAG: [[GS3:@.+]] = external global [[S5]]
+// CHECK-DEBUG-DAG: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23
+// CHECK-DEBUG-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01
+// CHECK-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer
+// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;162;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;216;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;303;19;;\00"
+// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;328;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;341;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;358;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;375;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC8:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;401;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC9:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;422;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;437;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;454;27;;\00"
+// CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;471;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;550;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC14:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;567;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC15:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;593;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC16:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;614;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;629;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;646;27;;\00"
+// CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;663;10;;\00"
+// CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;275;9;;\00"
+
+struct Static {
+  static S3 s;
+#pragma omp threadprivate(s)
+};
+
+static S1 gs1(5);
+#pragma omp threadprivate(gs1)
+// CHECK:      define {{.*}} [[S1_CTOR:@.*]]([[S1]]* {{.*}},
+// CHECK:      define {{.*}} [[S1_DTOR:@.*]]([[S1]]* {{.*}})
+// CHECK:      define internal {{.*}}i8* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]](i8*)
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]*
+// CHECK-NEXT: call {{.*}} [[S1_CTOR]]([[S1]]* [[RES]], {{.*}} 5)
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      ret i8* [[ARG]]
+// CHECK-NEXT: }
+// CHECK:      define internal {{.*}}void [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]](i8*)
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]*
+// CHECK-NEXT: call {{.*}} [[S1_DTOR]]([[S1]]* [[RES]])
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK:      define internal {{.*}}void [[GS1_INIT:@\.__omp_threadprivate_init_\..*]]()
+// CHECK:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR]])
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+// CHECK-DEBUG:      store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC1]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+// CHECK-DEBUG:      @__kmpc_global_thread_num
+// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]])
+// CHECK-DEBUG:      define internal {{.*}}i8* [[GS1_CTOR]](i8*)
+// CHECK-DEBUG:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK-DEBUG:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK-DEBUG:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]*
+// CHECK-DEBUG-NEXT: call {{.*}} [[S1_CTOR:@.+]]([[S1]]* [[RES]], {{.*}} 5)
+// CHECK-DEBUG:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK-DEBUG:      ret i8* [[ARG]]
+// CHECK-DEBUG-NEXT: }
+// CHECK-DEBUG:      define {{.*}} [[S1_CTOR]]([[S1]]* {{.*}},
+// CHECK-DEBUG:      define internal {{.*}}void [[GS1_DTOR]](i8*)
+// CHECK-DEBUG:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK-DEBUG:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK-DEBUG:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]*
+// CHECK-DEBUG-NEXT: call {{.*}} [[S1_DTOR:@.+]]([[S1]]* [[RES]])
+// CHECK-DEBUG-NEXT: ret void
+// CHECK-DEBUG-NEXT: }
+// CHECK-DEBUG:      define {{.*}} [[S1_DTOR]]([[S1]]* {{.*}})
+static S2 gs2(27);
+// CHECK:      define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}},
+// CHECK:      define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}})
+// No another call for S2 constructor because it is not threadprivate
+// CHECK-NOT:  call {{.*}} [[S2_CTOR]]([[S2]]*
+// CHECK-DEBUG:      define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}},
+// CHECK-DEBUG:      define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}})
+// No another call for S2 constructor because it is not threadprivate
+// CHECK-DEBUG-NOT:  call {{.*}} [[S2_CTOR]]([[S2]]*
+S1 arr_x[2][3] = { { 1, 2, 3 }, { 4, 5, 6 } };
+#pragma omp threadprivate(arr_x)
+// CHECK:      define internal {{.*}}i8* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]](i8*)
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [2 x [3 x [[S1]]]]*
+// CHECK:      [[ARR1:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[RES]], i{{.*}} 0, i{{.*}} 0
+// CHECK:      [[ARR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR1]], i{{.*}} 0, i{{.*}} 0
+// CHECK:      invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR]], [[INT]] {{.*}}1)
+// CHECK:      [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR]], i{{.*}} 1
+// CHECK:      invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT]], [[INT]] {{.*}}2)
+// CHECK:      [[ARR_ELEMENT2:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENT]], i{{.*}} 1
+// CHECK:      invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT2]], [[INT]] {{.*}}3)
+// CHECK:      [[ARR_ELEMENT3:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR1]], i{{.*}} 1
+// CHECK:      [[ARR_:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_ELEMENT3]], i{{.*}} 0, i{{.*}} 0
+// CHECK:      invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_]], [[INT]] {{.*}}4)
+// CHECK:      [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR_]], i{{.*}} 1
+// CHECK:      invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT]], [[INT]] {{.*}}5)
+// CHECK:      [[ARR_ELEMENT2:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENT]], i{{.*}} 1
+// CHECK:      invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT2]], [[INT]] {{.*}}6)
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      ret i8* [[ARG]]
+// CHECK:      }
+// CHECK:      define internal {{.*}}void [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]](i8*)
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[ARR_BEGIN:%.*]] = bitcast i8* [[ARG]] to [[S1]]*
+// CHECK-NEXT: [[ARR_CUR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_BEGIN]], i{{.*}} 6
+// CHECK-NEXT: br label %[[ARR_LOOP:.*]]
+// CHECK:      {{.*}}[[ARR_LOOP]]{{.*}}
+// CHECK-NEXT: [[ARR_ELEMENTPAST:%.*]] = phi [[S1]]* [ [[ARR_CUR]], {{.*}} ], [ [[ARR_ELEMENT:%.*]], {{.*}} ]
+// CHECK-NEXT: [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENTPAST]], i{{.*}} -1
+// CHECK-NEXT: invoke {{.*}} [[S1_DTOR]]([[S1]]* [[ARR_ELEMENT]])
+// CHECK:      [[ARR_DONE:%.*]] = icmp eq [[S1]]* [[ARR_ELEMENT]], [[ARR_BEGIN]]
+// CHECK-NEXT: br i1 [[ARR_DONE]], label %[[ARR_EXIT:.*]], label %[[ARR_LOOP]]
+// CHECK:      {{.*}}[[ARR_EXIT]]{{.*}}
+// CHECK-NEXT: ret void
+// CHECK:      }
+// CHECK:      define internal {{.*}}void [[ARR_X_INIT:@\.__omp_threadprivate_init_\..*]]()
+// CHECK:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR]])
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+// CHECK-DEBUG:      store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC2]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+// CHECK-DEBUG:      @__kmpc_global_thread_num
+// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]])
+// CHECK-DEBUG:      define internal {{.*}}i8* [[ARR_X_CTOR]](i8*)
+// CHECK-DEBUG:      }
+// CHECK-DEBUG:      define internal {{.*}}void [[ARR_X_DTOR]](i8*)
+// CHECK-DEBUG:      }
+extern S5 gs3;
+#pragma omp threadprivate(gs3)
+// No call for S5 constructor because gs3 has just declaration, not a definition.
+// CHECK-NOT:  call {{.*}}([[S5]]*
+// CHECK-DEBUG-NOT:  call {{.*}}([[S5]]*
+
+template <class T>
+struct ST {
+  static T st;
+#pragma omp threadprivate(st)
+};
+
+template <class T>
+T ST<T>::st(23);
+
+// CHECK-LABEL:  @main()
+// CHECK-DEBUG-LABEL: @main()
+int main() {
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+  int Res;
+  struct Smain {
+    int a;
+    double b, c;
+    Smain()
+        : a(0) {
+    }
+    Smain(int a)
+        : a(a) {
+    }
+    Smain(const Smain &s) {
+      a = 12 + s.a;
+    }
+    ~Smain() {
+      a = 0;
+    }
+  };
+
+  static Smain sm(gs1.a);
+// CHECK:      [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]])
+// CHECK:      call {{.*}}i{{.*}} @__cxa_guard_acquire
+// CHECK:      call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]])
+// CHECK:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]])
+// CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+// CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+// CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+// CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+// CHECK-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]])
+// CHECK:      call {{.*}}void @__cxa_guard_release
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
+// CHECK-DEBUG:      call {{.*}}i{{.*}} @__cxa_guard_acquire
+// CHECK-DEBUG:      call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
+// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]])
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+// CHECK-DEBUG:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+// CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+// CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+// CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+// CHECK-DEBUG-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]])
+// CHECK-DEBUG:      call {{.*}}void @__cxa_guard_release
+#pragma omp threadprivate(sm)
+  // CHECK:      [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.)
+  // CHECK-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
+  // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]]
+  // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC5]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
+  // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]]
+  Res = Static::s.a;
+  // CHECK:      [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[SM]].cache.)
+  // CHECK-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]*
+  // CHECK-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[SM_A:%.*]] = load [[INT]]* [[SM_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC6]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]*
+  // CHECK-DEBUG-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[SM_A:%.*]] = load [[INT]]* [[SM_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += sm.a;
+  // CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+  // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+  // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC7]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+  // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += gs1.a;
+  // CHECK:      [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0)
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0)
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += gs2.a;
+  // CHECK:      [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.)
+  // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
+  // CHECK-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC8]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
+  // CHECK-DEBUG-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += gs3.a;
+  // CHECK:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.)
+  // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
+  // CHECK-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC9]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += arr_x[1][1].a;
+  // CHECK:      [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.)
+  // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
+  // CHECK-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC10]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += ST<int>::st;
+  // CHECK:      [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.)
+  // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
+  // CHECK-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]]
+  // CHECK-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC11]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]]
+  // CHECK-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += static_cast<int>(ST<float>::st);
+  // CHECK:      [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.)
+  // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
+  // CHECK-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC12]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += ST<S4>::st.a;
+  // CHECK:      [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: ret [[INT]] [[RES]]
+  // CHECK-DEBUG:      [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: ret [[INT]] [[RES]]
+  return Res;
+}
+// CHECK: }
+
+// CHECK:      define internal {{.*}}i8* [[SM_CTOR]](i8*)
+// CHECK:      [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]])
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]*
+// CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+// CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+// CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+// CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+// CHECK-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[RES]], [[INT]] {{.*}}[[GS1_A]])
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK-NEXT: ret i8* [[ARG]]
+// CHECK-NEXT: }
+// CHECK:      define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}},
+// CHECK:      define internal {{.*}}void [[SM_DTOR]](i8*)
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]*
+// CHECK-NEXT: call {{.*}} [[SMAIN_DTOR:@.+]]([[SMAIN]]* [[RES]])
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK:      define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}})
+// CHECK-DEBUG:      define internal {{.*}}i8* [[SM_CTOR]](i8*)
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
+// CHECK-DEBUG:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK-DEBUG:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK-DEBUG:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]*
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+// CHECK-DEBUG:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+// CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+// CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+// CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+// CHECK-DEBUG-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[RES]], [[INT]] {{.*}}[[GS1_A]])
+// CHECK-DEBUG:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK-DEBUG-NEXT: ret i8* [[ARG]]
+// CHECK-DEBUG-NEXT: }
+// CHECK-DEBUG:      define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}},
+// CHECK-DEBUG:      define internal {{.*}} [[SM_DTOR:@.+]](i8*)
+// CHECK-DEBUG:      call {{.*}} [[SMAIN_DTOR:@.+]]([[SMAIN]]*
+// CHECK-DEBUG:      }
+// CHECK-DEBUG:      define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}})
+
+#endif
+
+#ifdef BODY
+// CHECK-LABEL:  @{{.*}}foobar{{.*}}()
+// CHECK-DEBUG-LABEL: @{{.*}}foobar{{.*}}()
+int foobar() {
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+  int Res;
+  // CHECK:      [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]])
+  // CHECK:      [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.)
+  // CHECK-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
+  // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]]
+  // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC13]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC13]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
+  // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]]
+  Res = Static::s.a;
+  // CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+  // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+  // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC14]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
+  // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += gs1.a;
+  // CHECK:      [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0)
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0)
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += gs2.a;
+  // CHECK:      [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.)
+  // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
+  // CHECK-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC15]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
+  // CHECK-DEBUG-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += gs3.a;
+  // CHECK:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.)
+  // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
+  // CHECK-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC16]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += arr_x[1][1].a;
+  // CHECK:      [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.)
+  // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
+  // CHECK-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC17]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += ST<int>::st;
+  // CHECK:      [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.)
+  // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
+  // CHECK-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]]
+  // CHECK-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC18]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]]
+  // CHECK-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += static_cast<int>(ST<float>::st);
+  // CHECK:      [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.)
+  // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
+  // CHECK-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]]
+  // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]]
+  // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC19]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]]
+  // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]]
+  // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
+  Res += ST<S4>::st.a;
+  // CHECK:      [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-NEXT: ret [[INT]] [[RES]]
+  // CHECK-DEBUG:      [[RES:%.*]] = load [[INT]]* [[RES_ADDR]]
+  // CHECK-DEBUG-NEXT: ret [[INT]] [[RES]]
+  return Res;
+}
+#endif
+
+// CHECK:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]])
+// CHECK:      define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8*)
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[S4]]*
+// CHECK-NEXT: call {{.*}} [[S4_CTOR:@.+]]([[S4]]* [[RES]], {{.*}} 23)
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK-NEXT: ret i8* [[ARG]]
+// CHECK-NEXT: }
+// CHECK:      define {{.*}} [[S4_CTOR]]([[S4]]* {{.*}},
+// CHECK:      define internal {{.*}}void [[ST_S4_ST_DTOR]](i8*)
+// CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
+// CHECK:      [[ARG:%.+]] = load i8** [[ARG_ADDR]]
+// CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[S4]]*
+// CHECK-NEXT: call {{.*}} [[S4_DTOR:@.+]]([[S4]]* [[RES]])
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK:      define {{.*}} [[S4_DTOR]]([[S4]]* {{.*}})
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
+// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC20]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+// CHECK-DEBUG:      @__kmpc_global_thread_num
+// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]])
+// CHECK-DEBUG:      define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8*)
+// CHECK-DEBUG:      }
+// CHECK-DEBUG:      define {{.*}} [[S4_CTOR:@.*]]([[S4]]* {{.*}},
+// CHECK-DEBUG:      define internal {{.*}}void [[ST_S4_ST_DTOR]](i8*)
+// CHECK-DEBUG:      }
+// CHECK-DEBUG:      define {{.*}} [[S4_DTOR:@.*]]([[S4]]* {{.*}})
+
+// CHECK:      define internal {{.*}}void {{@.*}}()
+// CHECK-DAG:  call {{.*}}void [[GS1_INIT]]()
+// CHECK-DAG:  call {{.*}}void [[ARR_X_INIT]]()
+// CHECK:      ret void
+// CHECK-DEBUG:      define internal {{.*}}void {{@.*}}()
+// CHECK-DEBUG:      ret void

Propchange: cfe/trunk/test/OpenMP/threadprivate_codegen.cpp
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/trunk/test/OpenMP/threadprivate_codegen.cpp
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Rev URL

Propchange: cfe/trunk/test/OpenMP/threadprivate_codegen.cpp
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp?rev=221663&view=auto
==============================================================================
--- cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp (added)
+++ cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp Mon Nov 10 22:05:39 2014
@@ -0,0 +1,26 @@
+// no PCH
+// RUN: %clang_cc1 -fopenmp=libiomp5 -emit-llvm -include %s -include %s %s -o - | FileCheck %s
+// with PCH
+// RUN: %clang_cc1 -fopenmp=libiomp5 -emit-llvm -chain-include %s -chain-include %s %s -o - | FileCheck %s
+#if !defined(PASS1)
+#define PASS1
+
+extern "C" int* malloc (int size);
+int *a = malloc(20);
+
+#elif !defined(PASS2)
+#define PASS2
+
+#pragma omp threadprivate(a)
+
+#else
+
+// CHECK: call {{.*}} @__kmpc_threadprivate_register(
+
+// CHECK-LABEL: foo
+int foo() {
+  return *a;
+  // CHECK: call {{.*}} @__kmpc_global_thread_num(
+  // CHECK: call {{.*}} @__kmpc_threadprivate_cached(
+}
+#endif

Propchange: cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Rev URL

Propchange: cfe/trunk/test/PCH/chain-openmp-threadprivate.cpp
------------------------------------------------------------------------------
    svn:mime-type = text/plain





More information about the cfe-commits mailing list