r254521 - [OpenMP] Update target directive codegen to use 4.5 implicit data mappings.

Samuel Antao via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 2 09:44:43 PST 2015


Author: sfantao
Date: Wed Dec  2 11:44:43 2015
New Revision: 254521

URL: http://llvm.org/viewvc/llvm-project?rev=254521&view=rev
Log:
[OpenMP] Update target directive codegen to use 4.5 implicit data mappings.

Summary:
This patch implements the 4.5 specification for the implicit data maps. OpenMP 4.5 specification changes the default way data is captured into a target region. All the non-aggregate kinds are passed by value by default. This required activating the capturing by value during SEMA for the target region. All the non-aggregate values that can be encoded in the size of a pointer are properly casted and forwarded to the runtime library. On top of fixing the previous weird behavior for mapping pointers in nested data regions (an explicit map was always required), this also improves performance as the number of allocations/transactions to the device per non-aggregate map are reduced from two to only one - instead of passing a reference and the value, only the value passed.

Explicit maps will be added later on once firstprivate, private, and map clauses' SEMA and parsing are available.

Reviewers: hfinkel, rjmccall, ABataev

Subscribers: cfe-commits, carlo.bertolli

Differential Revision: http://reviews.llvm.org/D14940

Added:
    cfe/trunk/test/OpenMP/target_map_codegen.cpp
Modified:
    cfe/trunk/include/clang/AST/Stmt.h
    cfe/trunk/include/clang/Sema/Sema.h
    cfe/trunk/lib/AST/Stmt.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/lib/Sema/SemaExpr.cpp
    cfe/trunk/lib/Sema/SemaOpenMP.cpp
    cfe/trunk/lib/Sema/SemaStmt.cpp
    cfe/trunk/test/OpenMP/target_codegen.cpp
    cfe/trunk/test/OpenMP/target_codegen_global_capture.cpp

Modified: cfe/trunk/include/clang/AST/Stmt.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Stmt.h?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/Stmt.h (original)
+++ cfe/trunk/include/clang/AST/Stmt.h Wed Dec  2 11:44:43 2015
@@ -1986,6 +1986,7 @@ public:
   enum VariableCaptureKind {
     VCK_This,
     VCK_ByRef,
+    VCK_ByCopy,
     VCK_VLAType,
   };
 
@@ -2005,21 +2006,7 @@ public:
     /// \param Var The variable being captured, or null if capturing this.
     ///
     Capture(SourceLocation Loc, VariableCaptureKind Kind,
-            VarDecl *Var = nullptr)
-      : VarAndKind(Var, Kind), Loc(Loc) {
-      switch (Kind) {
-      case VCK_This:
-        assert(!Var && "'this' capture cannot have a variable!");
-        break;
-      case VCK_ByRef:
-        assert(Var && "capturing by reference must have a variable!");
-        break;
-      case VCK_VLAType:
-        assert(!Var &&
-               "Variable-length array type capture cannot have a variable!");
-        break;
-      }
-    }
+            VarDecl *Var = nullptr);
 
     /// \brief Determine the kind of capture.
     VariableCaptureKind getCaptureKind() const { return VarAndKind.getInt(); }
@@ -2031,9 +2018,14 @@ public:
     /// \brief Determine whether this capture handles the C++ 'this' pointer.
     bool capturesThis() const { return getCaptureKind() == VCK_This; }
 
-    /// \brief Determine whether this capture handles a variable.
+    /// \brief Determine whether this capture handles a variable (by reference).
     bool capturesVariable() const { return getCaptureKind() == VCK_ByRef; }
 
+    /// \brief Determine whether this capture handles a variable by copy.
+    bool capturesVariableByCopy() const {
+      return getCaptureKind() == VCK_ByCopy;
+    }
+
     /// \brief Determine whether this capture handles a variable-length array
     /// type.
     bool capturesVariableArrayType() const {
@@ -2044,7 +2036,7 @@ public:
     ///
     /// This operation is only valid if this capture captures a variable.
     VarDecl *getCapturedVar() const {
-      assert(capturesVariable() &&
+      assert((capturesVariable() || capturesVariableByCopy()) &&
              "No variable available for 'this' or VAT capture");
       return VarAndKind.getPointer();
     }

Modified: cfe/trunk/include/clang/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/Sema.h (original)
+++ cfe/trunk/include/clang/Sema/Sema.h Wed Dec  2 11:44:43 2015
@@ -7751,6 +7751,12 @@ private:
   ExprResult VerifyPositiveIntegerConstantInClause(Expr *Op,
                                                    OpenMPClauseKind CKind);
 public:
+  /// \brief Return true if the provided declaration \a VD should be captured by
+  /// reference in the provided scope \a RSI. This will take into account the
+  /// semantics of the directive and associated clauses.
+  bool IsOpenMPCapturedByRef(VarDecl *VD,
+                             const sema::CapturedRegionScopeInfo *RSI);
+
   /// \brief Check if the specified variable is used in one of the private
   /// clauses (private, firstprivate, lastprivate, reduction etc.) in OpenMP
   /// constructs.

Modified: cfe/trunk/lib/AST/Stmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Stmt.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/lib/AST/Stmt.cpp (original)
+++ cfe/trunk/lib/AST/Stmt.cpp Wed Dec  2 11:44:43 2015
@@ -945,6 +945,33 @@ SEHFinallyStmt* SEHFinallyStmt::Create(c
   return new(C)SEHFinallyStmt(Loc,Block);
 }
 
+CapturedStmt::Capture::Capture(SourceLocation Loc, VariableCaptureKind Kind,
+                               VarDecl *Var)
+    : VarAndKind(Var, Kind), Loc(Loc) {
+  switch (Kind) {
+  case VCK_This:
+    assert(!Var && "'this' capture cannot have a variable!");
+    break;
+  case VCK_ByRef:
+    assert(Var && "capturing by reference must have a variable!");
+    break;
+  case VCK_ByCopy:
+    assert(Var && "capturing by copy must have a variable!");
+    assert(
+        (Var->getType()->isScalarType() || (Var->getType()->isReferenceType() &&
+                                            Var->getType()
+                                                ->castAs<ReferenceType>()
+                                                ->getPointeeType()
+                                                ->isScalarType())) &&
+        "captures by copy are expected to have a scalar type!");
+    break;
+  case VCK_VLAType:
+    assert(!Var &&
+           "Variable-length array type capture cannot have a variable!");
+    break;
+  }
+}
+
 CapturedStmt::Capture *CapturedStmt::getStoredCaptures() const {
   unsigned Size = sizeof(CapturedStmt) + sizeof(Stmt *) * (NumCaptures + 1);
 

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Wed Dec  2 11:44:43 2015
@@ -3180,7 +3180,7 @@ CGOpenMPRuntime::emitTargetOutlinedFunct
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-  return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true);
+  return CGF.GenerateOpenMPCapturedStmtFunction(CS);
 }
 
 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
@@ -3195,6 +3195,10 @@ void CGOpenMPRuntime::emitTargetCall(Cod
     OMP_MAP_TO = 0x01,
     /// \brief Allocate memory on the device and move data from device to host.
     OMP_MAP_FROM = 0x02,
+    /// \brief The element passed to the device is a pointer.
+    OMP_MAP_PTR = 0x20,
+    /// \brief Pass the element to the device by value.
+    OMP_MAP_BYCOPY = 0x80,
   };
 
   enum OpenMPOffloadingReservedDeviceIDs {
@@ -3203,6 +3207,8 @@ void CGOpenMPRuntime::emitTargetCall(Cod
     OMP_DEVICEID_UNDEF = -1,
   };
 
+  auto &Ctx = CGF.getContext();
+
   // Fill up the arrays with the all the captured variables.
   SmallVector<llvm::Value *, 16> BasePointers;
   SmallVector<llvm::Value *, 16> Pointers;
@@ -3225,27 +3231,61 @@ void CGOpenMPRuntime::emitTargetCall(Cod
     llvm::Value *Size;
     unsigned MapType;
 
+    // VLA sizes are passed to the outlined region by copy.
     if (CI->capturesVariableArrayType()) {
       BasePointer = Pointer = *CV;
       Size = getTypeSize(CGF, RI->getType());
+      // Copy to the device as an argument. No need to retrieve it.
+      MapType = OMP_MAP_BYCOPY;
       hasVLACaptures = true;
-      // VLA sizes don't need to be copied back from the device.
-      MapType = OMP_MAP_TO;
     } else if (CI->capturesThis()) {
       BasePointer = Pointer = *CV;
       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
       Size = getTypeSize(CGF, PtrTy->getPointeeType());
       // Default map type.
       MapType = OMP_MAP_TO | OMP_MAP_FROM;
+    } else if (CI->capturesVariableByCopy()) {
+      MapType = OMP_MAP_BYCOPY;
+      if (!RI->getType()->isAnyPointerType()) {
+        // If the field is not a pointer, we need to save the actual value and
+        // load it as a void pointer.
+        auto DstAddr = CGF.CreateMemTemp(
+            Ctx.getUIntPtrType(),
+            Twine(CI->getCapturedVar()->getName()) + ".casted");
+        LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
+
+        auto *SrcAddrVal = CGF.EmitScalarConversion(
+            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
+            Ctx.getPointerType(RI->getType()), SourceLocation());
+        LValue SrcLV =
+            CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
+
+        // Store the value using the source type pointer.
+        CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
+
+        // Load the value using the destination type pointer.
+        BasePointer = Pointer =
+            CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+      } else {
+        MapType |= OMP_MAP_PTR;
+        BasePointer = Pointer = *CV;
+      }
+      Size = getTypeSize(CGF, RI->getType());
     } else {
+      assert(CI->capturesVariable() && "Expected captured reference.");
       BasePointer = Pointer = *CV;
 
       const ReferenceType *PtrTy =
           cast<ReferenceType>(RI->getType().getTypePtr());
       QualType ElementType = PtrTy->getPointeeType();
       Size = getTypeSize(CGF, ElementType);
-      // Default map type.
-      MapType = OMP_MAP_TO | OMP_MAP_FROM;
+      // The default map type for a scalar/complex type is 'to' because by
+      // default the value doesn't have to be retrieved. For an aggregate type,
+      // the default is 'tofrom'.
+      MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
+                                               : OMP_MAP_TO;
+      if (ElementType->isAnyPointerType())
+        MapType |= OMP_MAP_PTR;
     }
 
     BasePointers.push_back(BasePointer);
@@ -3256,7 +3296,7 @@ void CGOpenMPRuntime::emitTargetCall(Cod
 
   // Keep track on whether the host function has to be executed.
   auto OffloadErrorQType =
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
   auto OffloadError = CGF.MakeAddrLValue(
       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
       OffloadErrorQType);
@@ -3264,7 +3304,7 @@ void CGOpenMPRuntime::emitTargetCall(Cod
                         OffloadError);
 
   // Fill up the pointer arrays and transfer execution to the device.
-  auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes,
+  auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
                     hasVLACaptures, Device, OffloadError,
                     OffloadErrorQType](CodeGenFunction &CGF) {
     unsigned PointerNumVal = BasePointers.size();
@@ -3276,8 +3316,8 @@ void CGOpenMPRuntime::emitTargetCall(Cod
 
     if (PointerNumVal) {
       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
-      QualType PointerArrayType = CGF.getContext().getConstantArrayType(
-          CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal,
+      QualType PointerArrayType = Ctx.getConstantArrayType(
+          Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
           /*IndexTypeQuals=*/0);
 
       BasePointersArray =
@@ -3289,8 +3329,8 @@ void CGOpenMPRuntime::emitTargetCall(Cod
       // sizes, otherwise we need to fill up the arrays as we do for the
       // pointers.
       if (hasVLACaptures) {
-        QualType SizeArrayType = CGF.getContext().getConstantArrayType(
-            CGF.getContext().getSizeType(), PointerNumAP, ArrayType::Normal,
+        QualType SizeArrayType = Ctx.getConstantArrayType(
+            Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
             /*IndexTypeQuals=*/0);
         SizesArray =
             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
@@ -3323,29 +3363,41 @@ void CGOpenMPRuntime::emitTargetCall(Cod
       MapTypesArray = MapTypesArrayGbl;
 
       for (unsigned i = 0; i < PointerNumVal; ++i) {
+
+        llvm::Value *BPVal = BasePointers[i];
+        if (BPVal->getType()->isPointerTy())
+          BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
+        else {
+          assert(BPVal->getType()->isIntegerTy() &&
+                 "If not a pointer, the value type must be an integer.");
+          BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
+        }
         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
             BasePointersArray, 0, i);
-        Address BPAddr(BP, CGM.getContext().getTypeAlignInChars(
-                               CGM.getContext().VoidPtrTy));
-        CGF.Builder.CreateStore(
-            CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BPAddr);
+        Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+        CGF.Builder.CreateStore(BPVal, BPAddr);
 
+        llvm::Value *PVal = Pointers[i];
+        if (PVal->getType()->isPointerTy())
+          PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
+        else {
+          assert(PVal->getType()->isIntegerTy() &&
+                 "If not a pointer, the value type must be an integer.");
+          PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
+        }
         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
             0, i);
-        Address PAddr(P, CGM.getContext().getTypeAlignInChars(
-                             CGM.getContext().VoidPtrTy));
-        CGF.Builder.CreateStore(
-            CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), PAddr);
+        Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+        CGF.Builder.CreateStore(PVal, PAddr);
 
         if (hasVLACaptures) {
           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
               /*Idx0=*/0,
               /*Idx1=*/i);
-          Address SAddr(S, CGM.getContext().getTypeAlignInChars(
-                               CGM.getContext().getSizeType()));
+          Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
                                   SAddr);

Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Wed Dec  2 11:44:43 2015
@@ -21,8 +21,7 @@ using namespace clang;
 using namespace CodeGen;
 
 void CodeGenFunction::GenerateOpenMPCapturedVars(
-    const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars,
-    bool UseOnlyReferences) {
+    const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
   const RecordDecl *RD = S.getCapturedRecordDecl();
   auto CurField = RD->field_begin();
   auto CurCap = S.captures().begin();
@@ -32,26 +31,46 @@ void CodeGenFunction::GenerateOpenMPCapt
     if (CurField->hasCapturedVLAType()) {
       auto VAT = CurField->getCapturedVLAType();
       auto *Val = VLASizeMap[VAT->getSizeExpr()];
-      // If we need to use only references, create a temporary location for the
-      // size of the VAT.
-      if (UseOnlyReferences) {
-        LValue LV =
-            MakeAddrLValue(CreateMemTemp(CurField->getType(), "__vla_size_ref"),
-                           CurField->getType());
-        EmitStoreThroughLValue(RValue::get(Val), LV);
-        Val = LV.getAddress().getPointer();
-      }
       CapturedVars.push_back(Val);
     } else if (CurCap->capturesThis())
       CapturedVars.push_back(CXXThisValue);
-    else
+    else if (CurCap->capturesVariableByCopy())
+      CapturedVars.push_back(
+          EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
+    else {
+      assert(CurCap->capturesVariable() && "Expected capture by reference.");
       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
+    }
+  }
+}
+
+static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
+                                    StringRef Name, LValue AddrLV,
+                                    bool isReferenceType = false) {
+  ASTContext &Ctx = CGF.getContext();
+
+  auto *CastedPtr = CGF.EmitScalarConversion(
+      AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
+      Ctx.getPointerType(DstType), SourceLocation());
+  auto TmpAddr =
+      CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
+          .getAddress();
+
+  // If we are dealing with references we need to return the address of the
+  // reference instead of the reference of the value.
+  if (isReferenceType) {
+    QualType RefType = Ctx.getLValueReferenceType(DstType);
+    auto *RefVal = TmpAddr.getPointer();
+    TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
+    auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
+    CGF.EmitScalarInit(RefVal, TmpLVal);
   }
+
+  return TmpAddr;
 }
 
 llvm::Function *
-CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
-                                                    bool UseOnlyReferences) {
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
   assert(
       CapturedStmtInfo &&
       "CapturedStmtInfo should be set when generating the captured function");
@@ -69,7 +88,17 @@ CodeGenFunction::GenerateOpenMPCapturedS
     QualType ArgType = FD->getType();
     IdentifierInfo *II = nullptr;
     VarDecl *CapVar = nullptr;
-    if (I->capturesVariable()) {
+
+    // If this is a capture by copy and the type is not a pointer, the outlined
+    // function argument type should be uintptr and the value properly casted to
+    // uintptr. This is necessary given that the runtime library is only able to
+    // deal with pointers. We can pass in the same way the VLA type sizes to the
+    // outlined function.
+    if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
+        I->capturesVariableArrayType())
+      ArgType = Ctx.getUIntPtrType();
+
+    if (I->capturesVariable() || I->capturesVariableByCopy()) {
       CapVar = I->getCapturedVar();
       II = CapVar->getIdentifier();
     } else if (I->capturesThis())
@@ -77,9 +106,6 @@ CodeGenFunction::GenerateOpenMPCapturedS
     else {
       assert(I->capturesVariableArrayType());
       II = &getContext().Idents.get("vla");
-      if (UseOnlyReferences)
-        ArgType = getContext().getLValueReferenceType(
-            ArgType, /*SpelledAsLValue=*/false);
     }
     if (ArgType->isVariablyModifiedType())
       ArgType = getContext().getVariableArrayDecayedType(ArgType);
@@ -111,15 +137,24 @@ CodeGenFunction::GenerateOpenMPCapturedS
   unsigned Cnt = CD->getContextParamPosition();
   I = S.captures().begin();
   for (auto *FD : RD->fields()) {
+    // If we are capturing a pointer by copy we don't need to do anything, just
+    // use the value that we get from the arguments.
+    if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
+      setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
+      ++Cnt, ++I;
+      continue;
+    }
+
     LValue ArgLVal =
         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
                        AlignmentSource::Decl);
     if (FD->hasCapturedVLAType()) {
-      if (UseOnlyReferences)
-        ArgLVal = EmitLoadOfReferenceLValue(
-            ArgLVal.getAddress(), ArgLVal.getType()->castAs<ReferenceType>());
+      LValue CastedArgLVal =
+          MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
+                                              Args[Cnt]->getName(), ArgLVal),
+                         FD->getType(), AlignmentSource::Decl);
       auto *ExprArg =
-          EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
+          EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
       auto VAT = FD->getCapturedVLAType();
       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
     } else if (I->capturesVariable()) {
@@ -132,6 +167,15 @@ CodeGenFunction::GenerateOpenMPCapturedS
       }
       setAddrOfLocalVar(
           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
+    } else if (I->capturesVariableByCopy()) {
+      assert(!FD->getType()->isAnyPointerType() &&
+             "Not expecting a captured pointer.");
+      auto *Var = I->getCapturedVar();
+      QualType VarTy = Var->getType();
+      setAddrOfLocalVar(I->getCapturedVar(),
+                        castValueFromUintptr(*this, FD->getType(),
+                                             Args[Cnt]->getName(), ArgLVal,
+                                             VarTy->isReferenceType()));
     } else {
       // If 'this' is captured, load it into CXXThisValue.
       assert(I->capturesThis());
@@ -2480,7 +2524,7 @@ void CodeGenFunction::EmitOMPTargetDirec
   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
 
   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
-  GenerateOpenMPCapturedVars(CS, CapturedVars, /*UseOnlyReferences=*/true);
+  GenerateOpenMPCapturedVars(CS, CapturedVars);
 
   // Emit target region as a standalone region.
   auto &&CodeGen = [&CS](CodeGenFunction &CGF) {

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Wed Dec  2 11:44:43 2015
@@ -2200,12 +2200,9 @@ public:
   llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
   Address GenerateCapturedStmtArgument(const CapturedStmt &S);
-  llvm::Function *
-  GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
-                                     bool UseOnlyReferences = false);
+  llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
   void GenerateOpenMPCapturedVars(const CapturedStmt &S,
-                                  SmallVectorImpl<llvm::Value *> &CapturedVars,
-                                  bool UseOnlyReferences = false);
+                                  SmallVectorImpl<llvm::Value *> &CapturedVars);
   /// \brief Perform element by element copying of arrays with type \a
   /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure
   /// generated by \a CopyGen.

Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Wed Dec  2 11:44:43 2015
@@ -12621,10 +12621,15 @@ static bool isVariableAlreadyCapturedInS
       
     // Compute the type of an expression that refers to this variable.
     DeclRefType = CaptureType.getNonReferenceType();
-      
+
+    // Similarly to mutable captures in lambda, all the OpenMP captures by copy
+    // are mutable in the sense that user can change their value - they are
+    // private instances of the captured declarations.
     const CapturingScopeInfo::Capture &Cap = CSI->getCapture(Var);
     if (Cap.isCopyCapture() &&
-        !(isa<LambdaScopeInfo>(CSI) && cast<LambdaScopeInfo>(CSI)->Mutable))
+        !(isa<LambdaScopeInfo>(CSI) && cast<LambdaScopeInfo>(CSI)->Mutable) &&
+        !(isa<CapturedRegionScopeInfo>(CSI) &&
+          cast<CapturedRegionScopeInfo>(CSI)->CapRegionKind == CR_OpenMP))
       DeclRefType.addConst();
     return true;
   }
@@ -12812,9 +12817,17 @@ static bool captureInCapturedRegion(Capt
   // By default, capture variables by reference.
   bool ByRef = true;
   // Using an LValue reference type is consistent with Lambdas (see below).
-  if (S.getLangOpts().OpenMP && S.IsOpenMPCapturedVar(Var))
-    DeclRefType = DeclRefType.getUnqualifiedType();
-  CaptureType = S.Context.getLValueReferenceType(DeclRefType);
+  if (S.getLangOpts().OpenMP) {
+    ByRef = S.IsOpenMPCapturedByRef(Var, RSI);
+    if (S.IsOpenMPCapturedVar(Var))
+      DeclRefType = DeclRefType.getUnqualifiedType();
+  }
+
+  if (ByRef)
+    CaptureType = S.Context.getLValueReferenceType(DeclRefType);
+  else
+    CaptureType = DeclRefType;
+
   Expr *CopyExpr = nullptr;
   if (BuildAndDiagnose) {
     // The current implementation assumes that all variables are captured

Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Wed Dec  2 11:44:43 2015
@@ -222,6 +222,8 @@ public:
       return Stack[Stack.size() - 2].Directive;
     return OMPD_unknown;
   }
+  /// \brief Return the directive associated with the provided scope.
+  OpenMPDirectiveKind getDirectiveForScope(const Scope *S) const;
 
   /// \brief Set default data sharing attribute to none.
   void setDefaultDSANone(SourceLocation Loc) {
@@ -729,12 +731,107 @@ bool DSAStackTy::hasDirective(NamedDirec
   return false;
 }
 
+OpenMPDirectiveKind DSAStackTy::getDirectiveForScope(const Scope *S) const {
+  for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
+    if (I->CurScope == S)
+      return I->Directive;
+  return OMPD_unknown;
+}
+
 void Sema::InitDataSharingAttributesStack() {
   VarDataSharingAttributesStack = new DSAStackTy(*this);
 }
 
 #define DSAStack static_cast<DSAStackTy *>(VarDataSharingAttributesStack)
 
+bool Sema::IsOpenMPCapturedByRef(VarDecl *VD,
+                                 const CapturedRegionScopeInfo *RSI) {
+  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+
+  auto &Ctx = getASTContext();
+  bool IsByRef = true;
+
+  // Find the directive that is associated with the provided scope.
+  auto DKind = DSAStack->getDirectiveForScope(RSI->TheScope);
+  auto Ty = VD->getType();
+
+  if (isOpenMPTargetDirective(DKind)) {
+    // This table summarizes how a given variable should be passed to the device
+    // given its type and the clauses where it appears. This table is based on
+    // the description in OpenMP 4.5 [2.10.4, target Construct] and
+    // OpenMP 4.5 [2.15.5, Data-mapping Attribute Rules and Clauses].
+    //
+    // =========================================================================
+    // | type |  defaultmap   | pvt | first | is_device_ptr |    map   | res.  |
+    // |      |(tofrom:scalar)|     |  pvt  |               |          |       |
+    // =========================================================================
+    // | scl  |               |     |       |       -       |          | bycopy|
+    // | scl  |               |  -  |   x   |       -       |     -    | bycopy|
+    // | scl  |               |  x  |   -   |       -       |     -    | null  |
+    // | scl  |       x       |     |       |       -       |          | byref |
+    // | scl  |       x       |  -  |   x   |       -       |     -    | bycopy|
+    // | scl  |       x       |  x  |   -   |       -       |     -    | null  |
+    // | scl  |               |  -  |   -   |       -       |     x    | byref |
+    // | scl  |       x       |  -  |   -   |       -       |     x    | byref |
+    //
+    // | agg  |      n.a.     |     |       |       -       |          | byref |
+    // | agg  |      n.a.     |  -  |   x   |       -       |     -    | byref |
+    // | agg  |      n.a.     |  x  |   -   |       -       |     -    | null  |
+    // | agg  |      n.a.     |  -  |   -   |       -       |     x    | byref |
+    // | agg  |      n.a.     |  -  |   -   |       -       |    x[]   | byref |
+    //
+    // | ptr  |      n.a.     |     |       |       -       |          | bycopy|
+    // | ptr  |      n.a.     |  -  |   x   |       -       |     -    | bycopy|
+    // | ptr  |      n.a.     |  x  |   -   |       -       |     -    | null  |
+    // | ptr  |      n.a.     |  -  |   -   |       -       |     x    | byref |
+    // | ptr  |      n.a.     |  -  |   -   |       -       |    x[]   | bycopy|
+    // | ptr  |      n.a.     |  -  |   -   |       x       |          | bycopy|
+    // | ptr  |      n.a.     |  -  |   -   |       x       |     x    | bycopy|
+    // | ptr  |      n.a.     |  -  |   -   |       x       |    x[]   | bycopy|
+    // =========================================================================
+    // Legend:
+    //  scl - scalar
+    //  ptr - pointer
+    //  agg - aggregate
+    //  x - applies
+    //  - - invalid in this combination
+    //  [] - mapped with an array section
+    //  byref - should be mapped by reference
+    //  byval - should be mapped by value
+    //  null - initialize a local variable to null on the device
+    //
+    // Observations:
+    //  - All scalar declarations that show up in a map clause have to be passed
+    //    by reference, because they may have been mapped in the enclosing data
+    //    environment.
+    //  - If the scalar value does not fit the size of uintptr, it has to be
+    //    passed by reference, regardless the result in the table above.
+    //  - For pointers mapped by value that have either an implicit map or an
+    //    array section, the runtime library may pass the NULL value to the
+    //    device instead of the value passed to it by the compiler.
+
+    // FIXME: Right now, only implicit maps are implemented. Properly mapping
+    // values requires having the map, private, and firstprivate clauses SEMA
+    // and parsing in place, which we don't yet.
+
+    if (Ty->isReferenceType())
+      Ty = Ty->castAs<ReferenceType>()->getPointeeType();
+    IsByRef = !Ty->isScalarType();
+  }
+
+  // When passing data by value, we need to make sure it fits the uintptr size
+  // and alignment, because the runtime library only deals with uintptr types.
+  // If it does not fit the uintptr size, we need to pass the data by reference
+  // instead.
+  if (!IsByRef &&
+      (Ctx.getTypeSizeInChars(Ty) >
+           Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) ||
+       Ctx.getDeclAlign(VD) > Ctx.getTypeAlignInChars(Ctx.getUIntPtrType())))
+    IsByRef = true;
+
+  return IsByRef;
+}
+
 bool Sema::IsOpenMPCapturedVar(VarDecl *VD) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   VD = VD->getCanonicalDecl();
@@ -5015,7 +5112,13 @@ StmtResult Sema::ActOnOpenMPTargetDirect
   if (!AStmt)
     return StmtError();
 
-  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
 
   // OpenMP [2.16, Nesting of Regions]
   // If specified, a teams construct must be contained within a target

Modified: cfe/trunk/lib/Sema/SemaStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaStmt.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaStmt.cpp (original)
+++ cfe/trunk/lib/Sema/SemaStmt.cpp Wed Dec  2 11:44:43 2015
@@ -3803,11 +3803,10 @@ static void buildCapturedStmtCaptureList
       continue;
     }
 
-    assert(Cap->isReferenceCapture() &&
-           "non-reference capture not yet implemented");
-
     Captures.push_back(CapturedStmt::Capture(Cap->getLocation(),
-                                             CapturedStmt::VCK_ByRef,
+                                             Cap->isReferenceCapture()
+                                                 ? CapturedStmt::VCK_ByRef
+                                                 : CapturedStmt::VCK_ByCopy,
                                              Cap->getVariable()));
     CaptureInits.push_back(Cap->getInitExpr());
   }

Modified: cfe/trunk/test/OpenMP/target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_codegen.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/target_codegen.cpp Wed Dec  2 11:44:43 2015
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -16,15 +16,15 @@
 // sizes.
 
 // CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2]
-// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 128]
 // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
-// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3]
-// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 128, i32 128]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 128, i32 3, i32 128, i32 3, i32 3, i32 128, i32 128, i32 3, i32 3]
 // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 128, i32 128, i32 3]
 // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
-// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3]
-// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 128, i32 128, i32 128, i32 3]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 128, i32 128, i32 128, i32 3]
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
@@ -66,7 +66,7 @@ int foo(int n) {
   // CHECK:       store i32 -1, i32* [[RHV]], align 4
   // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
-  // CHECK:       call void [[HVT1:@.+]](i32* {{[^,]+}})
+  // CHECK:       call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
   #pragma omp target if(0)
   {
     a += 1;
@@ -79,15 +79,15 @@ int foo(int n) {
   // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
   // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
   // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
-  // CHECK-DAG:   [[BP0]] = bitcast i16* %{{.+}} to i8*
-  // CHECK-DAG:   [[P0]] = bitcast i16* %{{.+}} to i8*
+  // CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
+  // CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
 
   // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
   // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
-  // CHECK:       call void [[HVT2:@.+]](i16* {{[^,]+}})
+  // CHECK:       call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}})
   // CHECK-NEXT:  br label %[[END]]
   // CHECK:       [[END]]
   #pragma omp target if(1)
@@ -106,15 +106,15 @@ int foo(int n) {
   // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
   // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
   // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
-  // CHECK-DAG:   [[BP0]] = bitcast i32* %{{.+}} to i8*
-  // CHECK-DAG:   [[P0]] = bitcast i32* %{{.+}} to i8*
+  // CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
+  // CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
 
   // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
   // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
   // CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
   // CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
-  // CHECK-DAG:   [[BP1]] = bitcast i16* %{{.+}} to i8*
-  // CHECK-DAG:   [[P1]] = bitcast i16* %{{.+}} to i8*
+  // CHECK-DAG:   [[BP1]] = inttoptr i[[SZ]] %{{.+}} to i8*
+  // CHECK-DAG:   [[P1]] = inttoptr i[[SZ]] %{{.+}} to i8*
   // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
   // CHECK-NEXT:  br label %[[IFEND:.+]]
 
@@ -137,12 +137,17 @@ int foo(int n) {
   }
 
   // We capture 3 VLA sizes in this target region
-  // CHECK:       store i[[SZ]] [[BNELEMSIZE:%.+]], i[[SZ]]* [[VLA0:%[^,]+]]
-  // CHECK:       store i[[SZ]] 5, i[[SZ]]* [[VLA1:%[^,]+]]
-  // CHECK:       store i[[SZ]] [[CNELEMSIZE1:%.+]], i[[SZ]]* [[VLA2:%[^,]+]]
+  // CHECK-64:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-64:       [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
+  // CHECK-64:       store i32 [[A_VAL]], i32* [[A_ADDR]],
+  // CHECK-64:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK-32:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
 
-  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[BNELEMSIZE]], 4
-  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[CNELEMSIZE1]]
+  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
+  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
   // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
 
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
@@ -183,26 +188,24 @@ int foo(int n) {
 
   // The names below are not necessarily consistent with the names used for the
   // addresses above as some are repeated.
-  // CHECK-DAG:   [[BP0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8*
-  // CHECK-DAG:   [[P0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8*
+  // CHECK-DAG:   [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
+  // CHECK-DAG:   [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
   // CHECK-DAG:   store i8* [[BP0]], i8** {{%[^,]+}}
   // CHECK-DAG:   store i8* [[P0]], i8** {{%[^,]+}}
   // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
 
-  // CHECK-DAG:   [[BP1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8*
-  // CHECK-DAG:   [[P1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8*
+  // CHECK-DAG:   [[BP1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
+  // CHECK-DAG:   [[P1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
   // CHECK-DAG:   store i8* [[BP1]], i8** {{%[^,]+}}
   // CHECK-DAG:   store i8* [[P1]], i8** {{%[^,]+}}
   // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
 
-  // CHECK-DAG:   [[BP2:%[^,]+]] = bitcast i[[SZ]]* [[VLA2]] to i8*
-  // CHECK-DAG:   [[P2:%[^,]+]] = bitcast i[[SZ]]* [[VLA2]] to i8*
-  // CHECK-DAG:   store i8* [[BP2]], i8** {{%[^,]+}}
-  // CHECK-DAG:   store i8* [[P2]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
   // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
 
-  // CHECK-DAG:   [[BP3:%[^,]+]] = bitcast i32* %{{.+}} to i8*
-  // CHECK-DAG:   [[P3:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+  // CHECK-DAG:   [[BP3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
+  // CHECK-DAG:   [[P3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
   // CHECK-DAG:   store i8* [[BP3]], i8** {{%[^,]+}}
   // CHECK-DAG:   store i8* [[P3]], i8** {{%[^,]+}}
   // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
@@ -265,67 +268,67 @@ int foo(int n) {
 
 // CHECK:       define internal void [[HVT0]]()
 
-// CHECK:       define internal void [[HVT1]](i32* dereferenceable(4) %{{.+}})
+// CHECK:       define internal void [[HVT1]](i[[SZ]] %{{.+}})
 // Create stack storage and store argument in there.
-// CHECK:       [[A_ADDR:%.+]] = alloca i32*, align
-// CHECK:       store i32* %{{.+}}, i32** [[A_ADDR]], align
-// CHECK:       [[A_ADDR2:%.+]] = load i32*, i32** [[A_ADDR]], align
-// CHECK:       load i32, i32* [[A_ADDR2]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
 
-// CHECK:       define internal void [[HVT2]](i16* dereferenceable(2) %{{.+}})
+// CHECK:       define internal void [[HVT2]](i[[SZ]] %{{.+}})
 // Create stack storage and store argument in there.
-// CHECK:       [[AA_ADDR:%.+]] = alloca i16*, align
-// CHECK:       store i16* %{{.+}}, i16** [[AA_ADDR]], align
-// CHECK:       [[AA_ADDR2:%.+]] = load i16*, i16** [[AA_ADDR]], align
-// CHECK:       load i16, i16* [[AA_ADDR2]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       load i16, i16* [[AA_CADDR]], align
 
 // CHECK:       define internal void [[HVT3]]
 // Create stack storage and store argument in there.
-// CHECK-DAG:   [[A_ADDR:%.+]] = alloca i32*, align
-// CHECK-DAG:   [[AA_ADDR:%.+]] = alloca i16*, align
-// CHECK-DAG:   store i32* %{{.+}}, i32** [[A_ADDR]], align
-// CHECK-DAG:   store i16* %{{.+}}, i16** [[AA_ADDR]], align
-// CHECK-DAG:   [[A_ADDR2:%.+]] = load i32*, i32** [[A_ADDR]], align
-// CHECK-DAG:   [[AA_ADDR2:%.+]] = load i16*, i16** [[AA_ADDR]], align
-// CHECK-DAG:   load i32, i32* [[A_ADDR2]], align
-// CHECK-DAG:   load i16, i16* [[AA_ADDR2]], align
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK-64-DAG:load i32, i32* [[A_CADDR]], align
+// CHECK-32-DAG:load i32, i32* [[A_ADDR]], align
+// CHECK-DAG:   load i16, i16* [[AA_CADDR]], align
 
 // CHECK:       define internal void [[HVT4]]
 // Create local storage for each capture.
-// CHECK-DAG:   [[LOCAL_A:%.+]] = alloca i32*
-// CHECK-DAG:   [[LOCAL_B:%.+]] = alloca [10 x float]*
-// CHECK-DAG:   [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]*
-// CHECK-DAG:   [[LOCAL_BN:%.+]] = alloca float*
-// CHECK-DAG:   [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
-// CHECK-DAG:   [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]*
-// CHECK-DAG:   [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]*
-// CHECK-DAG:   [[LOCAL_CN:%.+]] = alloca double*
-// CHECK-DAG:   [[LOCAL_D:%.+]] = alloca [[TT]]*
-// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCAL_A]]
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x float]*
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_BN:%.+]] = alloca float*
+// CHECK:       [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_CN:%.+]] = alloca double*
+// CHECK:       [[LOCAL_D:%.+]] = alloca [[TT]]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
 // CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
-// CHECK-DAG:   store i[[SZ]]* [[ARG_VLA1:%.+]], i[[SZ]]** [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
 // CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
 // CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
-// CHECK-DAG:   store i[[SZ]]* [[ARG_VLA2:%.+]], i[[SZ]]** [[LOCAL_VLA2]]
-// CHECK-DAG:   store i[[SZ]]* [[ARG_VLA3:%.+]], i[[SZ]]** [[LOCAL_VLA3]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
 // CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
 // CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
 
-// CHECK-DAG:   [[REF_A:%.+]] = load i32*, i32** [[LOCAL_A]],
+// CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
 // CHECK-DAG:   [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
-// CHECK-DAG:   [[REF_VLA1:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA1]],
-// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA1]],
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
 // CHECK-DAG:   [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
 // CHECK-DAG:   [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
-// CHECK-DAG:   [[REF_VLA2:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA2]],
-// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA2]],
-// CHECK-DAG:   [[REF_VLA3:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA3]],
-// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA3]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
 // CHECK-DAG:   [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
 // CHECK-DAG:   [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
 
 // Use captures.
-// CHECK-DAG:   load i32, i32* [[REF_A]]
+// CHECK-64-DAG:   load i32, i32* [[REF_A]]
+// CHECK-32-DAG:   load i32, i32* [[LOCAL_A]]
 // CHECK-DAG:   getelementptr inbounds [10 x float], [10 x float]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
 // CHECK-DAG:   getelementptr inbounds float, float* [[REF_BN]], i[[SZ]] 3
 // CHECK-DAG:   getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] 0, i[[SZ]] 1
@@ -406,10 +409,16 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FS1]]
 //
+// CHECK:          i8* @llvm.stacksave()
+// CHECK-64:       [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
+// CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
+// CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
+
+// CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
+// CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
+
 // We capture 2 VLA sizes in this target region
-// CHECK:       store i[[SZ]] 2, i[[SZ]]* [[VLA0:%[^,]+]]
-// CHECK:       store i[[SZ]] [[CELEMSIZE1:%.+]], i[[SZ]]* [[VLA1:%[^,]+]]
-// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[CELEMSIZE1]]
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
 // CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
 
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
@@ -434,20 +443,18 @@ int bar(int n){
 
 // The names below are not necessarily consistent with the names used for the
 // addresses above as some are repeated.
-// CHECK-DAG:   [[BP0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8*
-// CHECK-DAG:   [[P0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8*
+// CHECK-DAG:   [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
+// CHECK-DAG:   [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
 // CHECK-DAG:   store i8* [[BP0]], i8** {{%[^,]+}}
 // CHECK-DAG:   store i8* [[P0]], i8** {{%[^,]+}}
 // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
 
-// CHECK-DAG:   [[BP1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8*
-// CHECK-DAG:   [[P1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8*
-// CHECK-DAG:   store i8* [[BP1]], i8** {{%[^,]+}}
-// CHECK-DAG:   store i8* [[P1]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
 // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
 
-// CHECK-DAG:   [[BP2:%[^,]+]] = bitcast i32* %{{.+}} to i8*
-// CHECK-DAG:   [[P2:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG:   [[BP2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
+// CHECK-DAG:   [[P2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
 // CHECK-DAG:   store i8* [[BP2]], i8** {{%[^,]+}}
 // CHECK-DAG:   store i8* [[P2]], i8** {{%[^,]+}}
 // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
@@ -488,15 +495,15 @@ int bar(int n){
 // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0
 // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
 // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
-// CHECK-DAG:   [[BP0]] = bitcast i32* %{{.+}} to i8*
-// CHECK-DAG:   [[P0]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
+// CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
 
 // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1
 // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1
 // CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
 // CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
-// CHECK-DAG:   [[BP1]] = bitcast i16* %{{.+}} to i8*
-// CHECK-DAG:   [[P1]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
+// CHECK-DAG:   [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
 
 // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2
 // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2
@@ -540,15 +547,15 @@ int bar(int n){
 // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
 // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
 // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
-// CHECK-DAG:   [[BP0]] = bitcast i32* %{{.+}} to i8*
-// CHECK-DAG:   [[P0]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
+// CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
 
 // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
 // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
 // CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
 // CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
-// CHECK-DAG:   [[BP1]] = bitcast i16* %{{.+}} to i8*
-// CHECK-DAG:   [[P1]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
+// CHECK-DAG:   [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
 
 // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
 // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
@@ -580,65 +587,66 @@ int bar(int n){
 
 // CHECK:       define internal void [[HVT7]]
 // Create local storage for each capture.
-// CHECK-DAG:   [[LOCAL_THIS:%.+]] = alloca [[S1]]*
-// CHECK-DAG:   [[LOCAL_B:%.+]] = alloca i32*
-// CHECK-DAG:   [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]*
-// CHECK-DAG:   [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]*
-// CHECK-DAG:   [[LOCAL_C:%.+]] = alloca i16*
+// CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1]]*
+// CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_C:%.+]] = alloca i16*
 // CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
-// CHECK-DAG:   store i32* [[ARG_B:%.+]], i32** [[LOCAL_B]]
-// CHECK-DAG:   store i[[SZ]]* [[ARG_VLA1:%.+]], i[[SZ]]** [[LOCAL_VLA1]]
-// CHECK-DAG:   store i[[SZ]]* [[ARG_VLA2:%.+]], i[[SZ]]** [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
 // CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
 // Store captures in the context.
 // CHECK-DAG:   [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
-// CHECK-DAG:   [[REF_B:%.+]] = load i32*, i32** [[LOCAL_B]],
-// CHECK-DAG:   [[REF_VLA1:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA1]],
-// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA1]],
-// CHECK-DAG:   [[REF_VLA2:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA2]],
-// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA2]],
+// CHECK-64-DAG:[[REF_B:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
 // CHECK-DAG:   [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
 // Use captures.
 // CHECK-DAG:   getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0
-// CHECK-DAG:   load i32, i32* [[REF_B]]
+// CHECK-64-DAG:load i32, i32* [[REF_B]]
+// CHECK-32-DAG:load i32, i32* [[LOCAL_B]]
 // CHECK-DAG:   getelementptr inbounds i16, i16* [[REF_C]], i[[SZ]] %{{.+}}
 
 
 // CHECK:       define internal void [[HVT6]]
 // Create local storage for each capture.
-// CHECK-DAG:   [[LOCAL_A:%.+]] = alloca i32*
-// CHECK-DAG:   [[LOCAL_AA:%.+]] = alloca i16*
-// CHECK-DAG:   [[LOCAL_AAA:%.+]] = alloca i8*
-// CHECK-DAG:   [[LOCAL_B:%.+]] = alloca [10 x i32]*
-// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCAL_A]]
-// CHECK-DAG:   store i16* [[ARG_AA:%.+]], i16** [[LOCAL_AA]]
-// CHECK-DAG:   store i8* [[ARG_AAA:%.+]], i8** [[LOCAL_AAA]]
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
 // CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
 // Store captures in the context.
-// CHECK-DAG:   [[REF_A:%.+]] = load i32*, i32** [[LOCAL_A]],
-// CHECK-DAG:   [[REF_AA:%.+]] = load i16*, i16** [[LOCAL_AA]],
-// CHECK-DAG:   [[REF_AAA:%.+]] = load i8*, i8** [[LOCAL_AAA]],
-// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+// CHECK-64-DAG:   [[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:      [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:      [[REF_AAA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
+// CHECK-DAG:      [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
 // Use captures.
-// CHECK-DAG:   load i32, i32* [[REF_A]]
-// CHECK-DAG:   load i16, i16* [[REF_AA]]
-// CHECK-DAG:   load i8, i8* [[REF_AAA]]
-// CHECK-DAG:   getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
+// CHECK-64-DAG:   load i32, i32* [[REF_A]]
+// CHECK-DAG:      load i16, i16* [[REF_AA]]
+// CHECK-DAG:      load i8, i8* [[REF_AAA]]
+// CHECK-32-DAG:   load i32, i32* [[LOCAL_A]]
+// CHECK-DAG:      getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
 
 // CHECK:       define internal void [[HVT5]]
 // Create local storage for each capture.
-// CHECK-DAG:   [[LOCAL_A:%.+]] = alloca i32*
-// CHECK-DAG:   [[LOCAL_AA:%.+]] = alloca i16*
-// CHECK-DAG:   [[LOCAL_B:%.+]] = alloca [10 x i32]*
-// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCAL_A]]
-// CHECK-DAG:   store i16* [[ARG_AA:%.+]], i16** [[LOCAL_AA]]
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
 // CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
 // Store captures in the context.
-// CHECK-DAG:   [[REF_A:%.+]] = load i32*, i32** [[LOCAL_A]],
-// CHECK-DAG:   [[REF_AA:%.+]] = load i16*, i16** [[LOCAL_AA]],
+// CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
 // CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
 // Use captures.
-// CHECK-DAG:   load i32, i32* [[REF_A]]
+// CHECK-64-DAG:   load i32, i32* [[REF_A]]
+// CHECK-32-DAG:   load i32, i32* [[LOCAL_A]]
 // CHECK-DAG:   load i16, i16* [[REF_AA]]
 // CHECK-DAG:   getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
 #endif

Modified: cfe/trunk/test/OpenMP/target_codegen_global_capture.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_codegen_global_capture.cpp?rev=254521&r1=254520&r2=254521&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/target_codegen_global_capture.cpp (original)
+++ cfe/trunk/test/OpenMP/target_codegen_global_capture.cpp Wed Dec  2 11:44:43 2015
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -41,27 +41,88 @@ int foo(short a, short b, short c, short
   static float Sc = 7.0;
   static float Sd = 8.0;
 
-  // CHECK-DAG: [[REFB:%.+]] = bitcast i16* [[LB]] to i8*
-  // CHECK-DAG: store i8* [[REFB]], i8** [[GEPB:%.+]], align
-  // CHECK-DAG: [[REFC:%.+]] = bitcast i16* [[LC]] to i8*
-  // CHECK-DAG: store i8* [[REFC]], i8** [[GEPC:%.+]], align
-  // CHECK-DAG: [[REFD:%.+]] = bitcast i16* [[LD]] to i8*
-  // CHECK-DAG: store i8* [[REFD]], i8** [[GEPD:%.+]], align
-  // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align
-  // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align
-  // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align
-  // CHECK-DAG: store i8* bitcast (float* [[FB]] to i8*), i8** [[GEPFB:%.+]], align
-  // CHECK-DAG: store i8* bitcast (float* [[FC]] to i8*), i8** [[GEPFC:%.+]], align
-  // CHECK-DAG: store i8* bitcast (float* [[FD]] to i8*), i8** [[GEPFD:%.+]], align
-  // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-  // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG:    [[VALLB:%.+]] = load i16, i16* [[LB]],
+  // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb,
+  // CHECK-DAG:    [[VALFB:%.+]] = load float, float* @_ZZ3foossssE2Sb,
+  // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc,
+  // CHECK-DAG:    [[VALLC:%.+]] = load i16, i16* [[LC]],
+  // CHECK-DAG:    [[VALFC:%.+]] = load float, float* @_ZZ3foossssE2Sc,
+  // CHECK-DAG:    [[VALLD:%.+]] = load i16, i16* [[LD]],
+  // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd,
+  // CHECK-DAG:    [[VALFD:%.+]] = load float, float* @_ZZ3foossssE2Sd,
+
+  // 3 local vars being captured.
+
+  // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
+  // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
+  // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
+  // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
+  // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
+  // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
+  // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
+  // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
+  // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
+  // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
+  // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
+  // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
+  // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
+  // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
+  // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
+  // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // 3 static vars being captured.
+
+  // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
+  // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
+  // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
+  // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
+  // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
+  // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]],
+  // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float*
+  // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]],
+  // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8*
+  // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]],
+  // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]],
+  // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float*
+  // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]],
+  // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8*
+  // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]],
+  // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // 3 static global vars being captured.
+
+  // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]],
+  // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double*
+  // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]],
+  // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8*
+  // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]],
+  // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]],
+  // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]],
+  // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double*
+  // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]],
+  // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8*
+  // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]],
+  // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]],
+  // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]],
+  // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double*
+  // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]],
+  // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8*
+  // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]],
+  // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]],
+  // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
   // CHECK: call i32 @__tgt_target
   // CHECK: call void [[OFFLOADF:@.+]](
   // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
@@ -71,7 +132,7 @@ int foo(short a, short b, short c, short
     Gb += 1.0;
     Sb += 1.0;
 
-    // CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}})
+    // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}})
     // The parallel region only uses 3 captures.
     // CHECK:     call {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
     // CHECK:     call void @.omp_outlined.(i32* %{{.+}}, i32* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
@@ -106,45 +167,98 @@ int bar(short a, short b, short c, short
   // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}})
   // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i16* dereferenceable(2) [[A:%.+]], i16* dereferenceable(2) [[B:%.+]], i16* dereferenceable(2) [[C:%.+]], i16* dereferenceable(2) [[D:%.+]])
   // Capture a, b, c, d
+  // CHECK: [[ALLOCLA:%.+]] = alloca i16
+  // CHECK: [[ALLOCLB:%.+]] = alloca i16
+  // CHECK: [[ALLOCLC:%.+]] = alloca i16
+  // CHECK: [[ALLOCLD:%.+]] = alloca i16
+  // CHECK: [[LLA:%.+]] = load i16*, i16** [[ALLOCLA]],
+  // CHECK: [[LLB:%.+]] = load i16*, i16** [[ALLOCLB]],
+  // CHECK: [[LLC:%.+]] = load i16*, i16** [[ALLOCLC]],
+  // CHECK: [[LLD:%.+]] = load i16*, i16** [[ALLOCLD]],
   #pragma omp parallel
   {
-    // CHECK: [[ADRA:%.+]] = alloca i16*, align
-    // CHECK: [[ADRB:%.+]] = alloca i16*, align
-    // CHECK: [[ADRC:%.+]] = alloca i16*, align
-    // CHECK: [[ADRD:%.+]] = alloca i16*, align
-    // CHECK: store i16* [[A]], i16** [[ADRA]], align
-    // CHECK: store i16* [[B]], i16** [[ADRB]], align
-    // CHECK: store i16* [[C]], i16** [[ADRC]], align
-    // CHECK: store i16* [[D]], i16** [[ADRD]], align
-    // CHECK: [[REFA:%.+]] = load i16*, i16** [[ADRA]],
-    // CHECK: [[REFB:%.+]] = load i16*, i16** [[ADRB]],
-    // CHECK: [[REFC:%.+]] = load i16*, i16** [[ADRC]],
-    // CHECK: [[REFD:%.+]] = load i16*, i16** [[ADRD]],
-
-    // CHECK: load float, float* [[BA]]
-
-    // CHECK-DAG: [[CSTB:%.+]] = bitcast i16* [[REFB]] to i8*
-    // CHECK-DAG: [[CSTC:%.+]] = bitcast i16* [[REFC]] to i8*
-    // CHECK-DAG: [[CSTD:%.+]] = bitcast i16* [[REFD]] to i8*
-    // CHECK-DAG: store i8* [[CSTB]], i8** [[GEPB:%.+]], align
-    // CHECK-DAG: store i8* [[CSTC]], i8** [[GEPC:%.+]], align
-    // CHECK-DAG: store i8* [[CSTD]], i8** [[GEPD:%.+]], align
-    // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align
-    // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align
-    // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align
-    // CHECK-DAG: store i8* bitcast (float* [[BB]] to i8*), i8** [[GEPBB:%.+]], align
-    // CHECK-DAG: store i8* bitcast (float* [[BC]] to i8*), i8** [[GEPBC:%.+]], align
-    // CHECK-DAG: store i8* bitcast (float* [[BD]] to i8*), i8** [[GEPBD:%.+]], align
-
-    // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPBB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPBC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
-    // CHECK-DAG: [[GEPBD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG:    [[VALLB:%.+]] = load i16, i16* [[LLB]],
+    // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb,
+    // CHECK-DAG:    [[VALFB:%.+]] = load float, float* @_ZZ3barssssE2Sb,
+    // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc,
+    // CHECK-DAG:    [[VALLC:%.+]] = load i16, i16* [[LLC]],
+    // CHECK-DAG:    [[VALFC:%.+]] = load float, float* @_ZZ3barssssE2Sc,
+    // CHECK-DAG:    [[VALLD:%.+]] = load i16, i16* [[LLD]],
+    // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd,
+    // CHECK-DAG:    [[VALFD:%.+]] = load float, float* @_ZZ3barssssE2Sd,
+
+    // 3 local vars being captured.
+
+    // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
+    // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
+    // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
+    // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
+    // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
+    // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
+    // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
+    // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
+    // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
+    // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
+    // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
+    // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
+    // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
+    // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
+    // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
+    // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // 3 static vars being captured.
+
+    // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
+    // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
+    // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
+    // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
+    // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
+    // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]],
+    // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float*
+    // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]],
+    // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8*
+    // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]],
+    // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]],
+    // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float*
+    // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]],
+    // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8*
+    // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]],
+    // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // 3 static global vars being captured.
+
+    // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]],
+    // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double*
+    // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]],
+    // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8*
+    // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]],
+    // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]],
+    // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]],
+    // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double*
+    // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]],
+    // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8*
+    // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]],
+    // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]],
+    // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]],
+    // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double*
+    // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]],
+    // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8*
+    // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]],
+    // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]],
+    // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
     // CHECK: call i32 @__tgt_target
     // CHECK: call void [[OFFLOADF:@.+]](
     // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
@@ -154,7 +268,7 @@ int bar(short a, short b, short c, short
       Gb += 1.0;
       Sb += 1.0;
 
-      // CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}})
+      // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}})
       // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}})
 
       // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}})

Added: cfe/trunk/test/OpenMP/target_map_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_map_codegen.cpp?rev=254521&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_map_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_map_codegen.cpp Wed Dec  2 11:44:43 2015
@@ -0,0 +1,1015 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+///
+/// Implicit maps.
+///
+
+///==========================================================================///
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+// CK1-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK1-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK1-LABEL: implicit_maps_integer
+void implicit_maps_integer (int a){
+  int i = a;
+
+  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK1-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK1-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK1-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK1-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK1-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK1-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK1-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK1: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+   ++i;
+  }
+}
+
+// CK1: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK1: [[ADDR:%.+]] = alloca i[[sz]],
+// CK1: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK1-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK1-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK1-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK2-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK2-LABEL: implicit_maps_integer_reference
+void implicit_maps_integer_reference (int a){
+  int &i = a;
+  // CK2-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK2-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK2-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK2-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK2-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK2-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK2-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK2-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK2-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK2: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+   ++i;
+  }
+}
+
+// CK2: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK2: [[ADDR:%.+]] = alloca i[[sz]],
+// CK2: [[REF:%.+]] = alloca i32*,
+// CK2: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK2-64: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+// CK2-64: store i32* [[CADDR]], i32** [[REF]],
+// CK2-64: [[RVAL:%.+]] = load i32*, i32** [[REF]],
+// CK2-64: {{.+}} = load i32, i32* [[RVAL]],
+// CK2-32: store i32* [[ADDR]], i32** [[REF]],
+// CK2-32: [[RVAL:%.+]] = load i32*, i32** [[REF]],
+// CK2-32: {{.+}} = load i32, i32* [[RVAL]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+#ifdef CK3
+
+// CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK3-LABEL: implicit_maps_parameter
+void implicit_maps_parameter (int a){
+
+  // CK3-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK3-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK3-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK3-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK3-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK3-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK3-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK3-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK3: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+   ++a;
+  }
+}
+
+// CK3: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK3: [[ADDR:%.+]] = alloca i[[sz]],
+// CK3: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK3-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK3-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK3-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
+#ifdef CK4
+
+// CK4-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK4-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK4-LABEL: implicit_maps_nested_integer
+void implicit_maps_nested_integer (int a){
+  int i = a;
+
+  // The captures in parallel are by reference. Only the capture in target is by
+  // copy.
+
+  // CK4: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP1:@.+]] to void (i32*, i32*, ...)*), i32* {{.+}})
+  // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
+  #pragma omp parallel
+  {
+    // CK4-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+    // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+    // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+    // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+    // CK4-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+    // CK4-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+    // CK4-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+    // CK4-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+    // CK4-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+    // CK4-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+    // CK4-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+    // CK4-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+    // CK4: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+    #pragma omp target
+    {
+      #pragma omp parallel
+      {
+        ++i;
+      }
+    }
+  }
+}
+
+// CK4: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK4: [[ADDR:%.+]] = alloca i[[sz]],
+// CK4: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK4-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK4-64: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP2:@.+]] to void (i32*, i32*, ...)*), i32* [[CADDR]])
+// CK4-32: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP2:@.+]] to void (i32*, i32*, ...)*), i32* [[ADDR]])
+// CK4: define internal void [[KERNELP2]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64
+// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-64
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
+// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
+#ifdef CK5
+
+// CK5-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK5-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK5-LABEL: implicit_maps_nested_integer_and_enum
+void implicit_maps_nested_integer_and_enum (int a){
+  enum Bla {
+    SomeEnum = 0x09
+  };
+
+  // Using an enum should not change the mapping information.
+  int  i = a;
+
+  // CK5-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK5-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK5-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK5-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK5-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK5-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK5-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK5-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK5-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK5: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+    ++i;
+    i += SomeEnum;
+  }
+}
+
+// CK5: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK5: [[ADDR:%.+]] = alloca i[[sz]],
+// CK5: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK5-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK5-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK5-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-64
+// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-64
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
+// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
+#ifdef CK6
+// CK6-DAG: [[GBL:@Gi]] = global i32 0
+// CK6-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK6-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK6-LABEL: implicit_maps_host_global
+int Gi;
+void implicit_maps_host_global (int a){
+  // CK6-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK6-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK6-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK6-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK6-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK6-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK6-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK6-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK6-64-DAG: store i32 [[GBLVAL:%.+]], i32* [[CADDR]],
+  // CK6-64-DAG: [[GBLVAL]] = load i32, i32* [[GBL]],
+  // CK6-32-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[GBLVAL:%.+]],
+
+  // CK6: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+    ++Gi;
+  }
+}
+
+// CK6: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK6: [[ADDR:%.+]] = alloca i[[sz]],
+// CK6: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK6-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK6-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK6-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
+// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
+#ifdef CK7
+
+// For a 32-bit targets, the value doesn't fit the size of the pointer,
+// therefore it is passed by reference with a map 'to' specification.
+
+// CK7-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK7-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+// Map types: OMP_MAP_TO = 1
+// CK7-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1]
+
+// CK7-LABEL: implicit_maps_double
+void implicit_maps_double (int a){
+  double d = (double)a;
+
+  // CK7-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK7-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+
+  // CK7-64-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK7-64-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK7-64-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK7-64-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK7-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK7-64-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to double*
+  // CK7-64-64-DAG: store double {{.+}}, double* [[CADDR]],
+
+  // CK7-32-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK7-32-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK7-32-DAG: [[VALBP]] = bitcast double* [[DECL:%.+]] to i8*
+  // CK7-32-DAG: [[VALP]] = bitcast double* [[DECL]] to i8*
+
+  // CK7-64: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  // CK7-32: call void [[KERNEL:@.+]](double* [[DECL]])
+  #pragma omp target
+  {
+    d += 1.0;
+  }
+}
+
+// CK7-64: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK7-64: [[ADDR:%.+]] = alloca i[[sz]],
+// CK7-64: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK7-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to double*
+// CK7-64: {{.+}} = load double, double* [[CADDR]],
+
+// CK7-32: define internal void [[KERNEL]](double* {{.+}}[[ARG:%.+]])
+// CK7-32: [[ADDR:%.+]] = alloca double*,
+// CK7-32: store double* [[ARG]], double** [[ADDR]],
+// CK7-32: [[REF:%.+]] = load double*, double** [[ADDR]],
+// CK7-32: {{.+}} = load double, double* [[REF]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
+#ifdef CK8
+
+// CK8-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK8-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK8-LABEL: implicit_maps_float
+void implicit_maps_float (int a){
+  float f = (float)a;
+
+  // CK8-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK8-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK8-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK8-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK8-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK8-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK8-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK8-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to float*
+  // CK8-DAG: store float {{.+}}, float* [[CADDR]],
+
+  // CK8: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+    f += 1.0;
+  }
+}
+
+// CK8: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK8: [[ADDR:%.+]] = alloca i[[sz]],
+// CK8: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK8: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to float*
+// CK8: {{.+}} = load float, float* [[CADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
+#ifdef CK9
+
+// CK9-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
+// Map types: OMP_MAP_TO + OMP_MAP_FROM = 2 + 1
+// CK9-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 3]
+
+// CK9-LABEL: implicit_maps_array
+void implicit_maps_array (int a){
+  double darr[2] = {(double)a, (double)a};
+
+  // CK9-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK9-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK9-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK9-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK9-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK9-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK9-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK9-DAG: [[VALBP]] = bitcast [2 x double]* [[DECL:%.+]] to i8*
+  // CK9-DAG: [[VALP]] = bitcast [2 x double]* [[DECL]] to i8*
+
+  // CK9: call void [[KERNEL:@.+]]([2 x double]* [[DECL]])
+  #pragma omp target
+  {
+    darr[0] += 1.0;
+    darr[1] += 1.0;
+  }
+}
+
+// CK9: define internal void [[KERNEL]]([2 x double]* {{.+}}[[ARG:%.+]])
+// CK9: [[ADDR:%.+]] = alloca [2 x double]*,
+// CK9: store [2 x double]* [[ARG]], [2 x double]** [[ADDR]],
+// CK9: [[REF:%.+]] = load [2 x double]*, [2 x double]** [[ADDR]],
+// CK9: {{.+}} = getelementptr inbounds [2 x double], [2 x double]* [[REF]], i[[sz]] 0, i[[sz]] 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
+#ifdef CK10
+
+// CK10-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}]
+// Map types: OMP_MAP_BYCOPY | OMP_MAP_PTR = 128 + 32
+// CK10-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 160]
+
+// CK10-LABEL: implicit_maps_pointer
+void implicit_maps_pointer (){
+  double *ddyn;
+
+  // CK10-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK10-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK10-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK10-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK10-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK10-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK10-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK10-DAG: [[VALBP]] = bitcast double* [[PTR:%.+]] to i8*
+  // CK10-DAG: [[VALP]] = bitcast double* [[PTR]] to i8*
+
+  // CK10: call void [[KERNEL:@.+]](double* [[PTR]])
+  #pragma omp target
+  {
+    ddyn[0] += 1.0;
+    ddyn[1] += 1.0;
+  }
+}
+
+// CK10: define internal void [[KERNEL]](double* {{.*}}[[ARG:%.+]])
+// CK10: [[ADDR:%.+]] = alloca double*,
+// CK10: store double* [[ARG]], double** [[ADDR]],
+// CK10: [[REF:%.+]] = load double*, double** [[ADDR]],
+// CK10: {{.+}} = getelementptr inbounds double, double* [[REF]], i[[sz]] 0
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
+#ifdef CK11
+
+// CK11-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
+// Map types: OMP_MAP_TO = 1
+// CK11-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1]
+
+// CK11-LABEL: implicit_maps_double_complex
+void implicit_maps_double_complex (int a){
+  double _Complex dc = (double)a;
+
+  // CK11-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK11-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK11-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK11-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK11-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK11-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK11-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK11-DAG: [[VALBP]] = bitcast { double, double }* [[PTR:%.+]] to i8*
+  // CK11-DAG: [[VALP]] = bitcast { double, double }* [[PTR]] to i8*
+
+  // CK11: call void [[KERNEL:@.+]]({ double, double }* [[PTR]])
+  #pragma omp target
+  {
+   dc *= dc;
+  }
+}
+
+// CK11: define internal void [[KERNEL]]({ double, double }* {{.*}}[[ARG:%.+]])
+// CK11: [[ADDR:%.+]] = alloca { double, double }*,
+// CK11: store { double, double }* [[ARG]], { double, double }** [[ADDR]],
+// CK11: [[REF:%.+]] = load { double, double }*, { double, double }** [[ADDR]],
+// CK11: {{.+}} = getelementptr inbounds { double, double }, { double, double }* [[REF]], i32 0, i32 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-64
+// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-64
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
+// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
+#ifdef CK12
+
+// For a 32-bit targets, the value doesn't fit the size of the pointer,
+// therefore it is passed by reference with a map 'to' specification.
+
+// CK12-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK12-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+// Map types: OMP_MAP_TO = 1
+// CK12-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1]
+
+// CK12-LABEL: implicit_maps_float_complex
+void implicit_maps_float_complex (int a){
+  float _Complex fc = (float)a;
+
+  // CK12-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK12-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK12-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK12-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+
+  // CK12-64-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK12-64-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK12-64-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK12-64-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK12-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK12-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to { float, float }*
+  // CK12-64-DAG: store { float, float } {{.+}}, { float, float }* [[CADDR]],
+
+  // CK12-32-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK12-32-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK12-32-DAG: [[VALBP]] = bitcast { float, float }* [[DECL:%.+]] to i8*
+  // CK12-32-DAG: [[VALP]] = bitcast { float, float }* [[DECL]] to i8*
+
+  // CK12-64: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  // CK12-32: call void [[KERNEL:@.+]]({ float, float }* [[DECL]])
+  #pragma omp target
+  {
+    fc *= fc;
+  }
+}
+
+// CK12-64: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK12-64: [[ADDR:%.+]] = alloca i[[sz]],
+// CK12-64: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK12-64: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to { float, float }*
+// CK12-64: {{.+}} = getelementptr inbounds { float, float }, { float, float }* [[CADDR]], i32 0, i32 0
+
+// CK12-32: define internal void [[KERNEL]]({ float, float }* {{.+}}[[ARG:%.+]])
+// CK12-32: [[ADDR:%.+]] = alloca { float, float }*,
+// CK12-32: store { float, float }* [[ARG]], { float, float }** [[ADDR]],
+// CK12-32: [[REF:%.+]] = load { float, float }*, { float, float }** [[ADDR]],
+// CK12-32: {{.+}} = getelementptr inbounds { float, float }, { float, float }* [[REF]], i32 0, i32 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
+#ifdef CK13
+
+// We don't have a constant map size for VLAs.
+// Map types:
+//  - OMP_MAP_BYCOPY = 128 (vla size)
+//  - OMP_MAP_BYCOPY = 128 (vla size)
+//  - OMP_MAP_TO + OMP_MAP_FROM = 2 + 1
+// CK13-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i32] [i32 128, i32 128, i32 3]
+
+// CK13-LABEL: implicit_maps_variable_length_array
+void implicit_maps_variable_length_array (int a){
+  double vla[2][a];
+
+  // CK13-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i[[sz:64|32]]* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK13-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK13-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK13-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0
+
+  // CK13-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK13-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK13-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 0
+  // CK13-DAG: store i8* inttoptr (i[[sz]] 2 to i8*), i8** [[BP0]],
+  // CK13-DAG: store i8* inttoptr (i[[sz]] 2 to i8*), i8** [[P0]],
+  // CK13-DAG: store i[[sz]] {{8|4}}, i[[sz]]* [[S0]],
+
+  // CK13-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK13-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK13-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 1
+  // CK13-DAG: store i8* [[VALBP1:%.+]], i8** [[BP1]],
+  // CK13-DAG: store i8* [[VALP1:%.+]], i8** [[P1]],
+  // CK13-DAG: [[VALBP1]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK13-DAG: [[VALP1]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK13-DAG: store i[[sz]] {{8|4}}, i[[sz]]* [[S1]],
+
+  // CK13-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
+  // CK13-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2
+  // CK13-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 2
+  // CK13-DAG: store i8* [[VALBP2:%.+]], i8** [[BP2]],
+  // CK13-DAG: store i8* [[VALP2:%.+]], i8** [[P2]],
+  // CK13-DAG: store i[[sz]] [[VALS2:%.+]], i[[sz]]* [[S2]],
+  // CK13-DAG: [[VALBP2]] = bitcast double* [[DECL:%.+]] to i8*
+  // CK13-DAG: [[VALP2]] = bitcast double* [[DECL]] to i8*
+  // CK13-DAG: [[VALS2]] = mul nuw i[[sz]] %{{.+}}, 8
+
+  // CK13: call void [[KERNEL:@.+]](i[[sz]] {{.+}}, i[[sz]] {{.+}}, double* [[DECL]])
+  #pragma omp target
+  {
+    vla[1][3] += 1.0;
+  }
+}
+
+// CK13: define internal void [[KERNEL]](i[[sz]] [[VLA0:%.+]], i[[sz]] [[VLA1:%.+]], double* {{.+}}[[ARG:%.+]])
+// CK13: [[ADDR0:%.+]] = alloca i[[sz]],
+// CK13: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK13: [[ADDR2:%.+]] = alloca double*,
+// CK13: store i[[sz]] [[VLA0]], i[[sz]]* [[ADDR0]],
+// CK13: store i[[sz]] [[VLA1]], i[[sz]]* [[ADDR1]],
+// CK13: store double* [[ARG]], double** [[ADDR2]],
+// CK13: {{.+}} = load i[[sz]],  i[[sz]]* [[ADDR0]],
+// CK13: {{.+}} = load i[[sz]],  i[[sz]]* [[ADDR1]],
+// CK13: [[REF:%.+]] = load double*, double** [[ADDR2]],
+// CK13: {{.+}} = getelementptr inbounds double, double* [[REF]], i[[sz]] %{{.+}}
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-64
+// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-64
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
+// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
+#ifdef CK14
+
+// CK14-DAG: [[ST:%.+]] = type { i32, double }
+// CK14-DAG: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}, i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2
+// - OMP_MAP_BYCOPY = 128
+// CK14-DAG: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128]
+
+class SSS {
+public:
+  int a;
+  double b;
+
+  void foo(int c) {
+    #pragma omp target
+    {
+      a += c;
+      b += (double)c;
+    }
+  }
+
+  SSS(int a, double b) : a(a), b(b) {}
+};
+
+// CK14-LABEL: implicit_maps_class
+void implicit_maps_class (int a){
+  SSS sss(a, (double)a);
+
+  // CK14: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
+  // CK14-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK14-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK14-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK14-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK14-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK14-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]],
+  // CK14-DAG: store i8* [[VALP0:%.+]], i8** [[P0]],
+  // CK14-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK14-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK14-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK14-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK14-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK14-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK14-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK14-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK14-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK14-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK14-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK14: call void [[KERNEL:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}})
+  sss.foo(123);
+}
+
+// CK14: define internal void [[KERNEL]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]])
+// CK14: [[ADDR0:%.+]] = alloca [[ST]]*,
+// CK14: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK14: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]],
+// CK14: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]],
+// CK14: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]],
+// CK14-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32*
+// CK14-64: {{.+}} = load i32,  i32* [[CADDR1]],
+// CK14-32: {{.+}} = load i32, i32* [[ADDR1]],
+// CK14: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-64
+// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-64
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
+// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
+#ifdef CK15
+
+// CK15: [[ST:%.+]] = type { i32, double, i32* }
+// CK15: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2
+// - OMP_MAP_BYCOPY = 128
+// CK15: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128]
+
+// CK15: [[SIZES2:@.+]] = {{.+}}constant [2 x i[[sz]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2
+// - OMP_MAP_BYCOPY = 128
+// CK15: [[TYPES2:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128]
+
+template<int x>
+class SSST {
+public:
+  int a;
+  double b;
+  int &r;
+
+  void foo(int c) {
+    #pragma omp target
+    {
+      a += c + x;
+      b += (double)(c + x);
+      r += x;
+    }
+  }
+  template<int y>
+  void bar(int c) {
+    #pragma omp target
+    {
+      a += c + x + y;
+      b += (double)(c + x + y);
+      r += x + y;
+    }
+  }
+
+  SSST(int a, double b, int &r) : a(a), b(b), r(r) {}
+};
+
+// CK15-LABEL: implicit_maps_templated_class
+void implicit_maps_templated_class (int a){
+  SSST<123> ssst(a, (double)a, a);
+
+  // CK15: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
+  // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK15-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]],
+  // CK15-DAG: store i8* [[VALP0:%.+]], i8** [[P0]],
+  // CK15-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK15-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK15-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK15-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK15-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK15: call void [[KERNEL:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}})
+  ssst.foo(456);
+
+  // CK15: define {{.*}}void @{{.+}}bar{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
+  // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
+  // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK15-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]],
+  // CK15-DAG: store i8* [[VALP0:%.+]], i8** [[P0]],
+  // CK15-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK15-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK15-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK15-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK15-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK15: call void [[KERNEL2:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}})
+  ssst.bar<210>(789);
+}
+
+// CK15: define internal void [[KERNEL]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]])
+// CK15: [[ADDR0:%.+]] = alloca [[ST]]*,
+// CK15: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK15: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]],
+// CK15: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]],
+// CK15: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]],
+// CK15-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32*
+// CK15-64: {{.+}} = load i32,  i32* [[CADDR1]],
+// CK15-32: {{.+}} = load i32, i32* [[ADDR1]],
+// CK15: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0
+
+// CK15: define internal void [[KERNEL2]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]])
+// CK15: [[ADDR0:%.+]] = alloca [[ST]]*,
+// CK15: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK15: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]],
+// CK15: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]],
+// CK15: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]],
+// CK15-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32*
+// CK15-64: {{.+}} = load i32,  i32* [[CADDR1]],
+// CK15-32: {{.+}} = load i32, i32* [[ADDR1]],
+// CK15: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-64
+// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-64
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
+// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
+#ifdef CK16
+
+// CK16-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_BYCOPY = 128
+// CK16-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+template<int y>
+int foo(int d) {
+  int res = d;
+  #pragma omp target
+  {
+    res += y;
+  }
+  return res;
+}
+// CK16-LABEL: implicit_maps_templated_function
+void implicit_maps_templated_function (int a){
+  int i = a;
+
+  // CK16: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
+  // CK16-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK16-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK16-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK16-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK16-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK16-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK16-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK16-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK16-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK16-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK16: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  i = foo<543>(i);
+}
+// CK16: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK16: [[ADDR:%.+]] = alloca i[[sz]],
+// CK16: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK16-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK16-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK16-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
+#ifdef CK17
+
+// CK17-DAG: [[ST:%.+]] = type { i32, double }
+// CK17-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}]
+// Map types: OMP_MAP_TO + OMP_MAP_FROM = 2 + 1
+// CK17-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 3]
+
+class SSS {
+public:
+  int a;
+  double b;
+};
+
+// CK17-LABEL: implicit_maps_struct
+void implicit_maps_struct (int a){
+  SSS s = {a, (double)a};
+
+  // CK17-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK17-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK17-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK17-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK17-DAG: [[VALBP]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK17-DAG: [[VALP]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK17: call void [[KERNEL:@.+]]([[ST]]* [[DECL]])
+  #pragma omp target
+  {
+    s.a += 1;
+    s.b += 1.0;
+  }
+}
+
+// CK17: define internal void [[KERNEL]]([[ST]]* {{.+}}[[ARG:%.+]])
+// CK17: [[ADDR:%.+]] = alloca [[ST]]*,
+// CK17: store [[ST]]* [[ARG]], [[ST]]** [[ADDR]],
+// CK17: [[REF:%.+]] = load [[ST]]*, [[ST]]** [[ADDR]],
+// CK17: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF]], i32 0, i32 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64
+// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-64
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
+// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
+#ifdef CK18
+
+// CK18-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_BYCOPY = 128
+// CK18-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+template<typename T>
+int foo(T d) {
+  #pragma omp target
+  {
+    d += (T)1;
+  }
+  return d;
+}
+// CK18-LABEL: implicit_maps_template_type_capture
+void implicit_maps_template_type_capture (int a){
+  int i = a;
+
+  // CK18: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
+  // CK18-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK18-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK18-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK18-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK18-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK18-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK18-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK18-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK18-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK18-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK18: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  i = foo(i);
+}
+// CK18: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK18: [[ADDR:%.+]] = alloca i[[sz]],
+// CK18: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK18-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK18-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK18-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+#endif




More information about the cfe-commits mailing list