r345609 - [OPENMP] Support for mapping of the lambdas in target regions.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 30 08:50:12 PDT 2018


Author: abataev
Date: Tue Oct 30 08:50:12 2018
New Revision: 345609

URL: http://llvm.org/viewvc/llvm-project?rev=345609&view=rev
Log:
[OPENMP] Support for mapping of the lambdas in target regions.

Added support for mapping of lambdas in the target regions. It scans all
the captures by reference in the lambda, implicitly maps those variables
in the target region and then later reinstate the addresses of
references in lambda to the correct addresses of the captured|privatized
variables.

Added:
    cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
    cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
    cfe/trunk/lib/Sema/SemaOpenMP.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=345609&r1=345608&r2=345609&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Tue Oct 30 08:50:12 2018
@@ -7532,6 +7532,76 @@ public:
     }
   }
 
+  /// Emit capture info for lambdas for variables captured by reference.
+  void generateInfoForLambdaCaptures(const ValueDecl *VD, llvm::Value *Arg,
+                                     MapBaseValuesArrayTy &BasePointers,
+                                     MapValuesArrayTy &Pointers,
+                                     MapValuesArrayTy &Sizes,
+                                     MapFlagsArrayTy &Types) const {
+    const auto *RD = VD->getType()
+                         .getCanonicalType()
+                         .getNonReferenceType()
+                         ->getAsCXXRecordDecl();
+    if (!RD || !RD->isLambda())
+      return;
+    Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
+    LValue VDLVal = CGF.MakeAddrLValue(
+        VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+    llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+    FieldDecl *ThisCapture = nullptr;
+    RD->getCaptureFields(Captures, ThisCapture);
+    if (ThisCapture) {
+      LValue ThisLVal =
+          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+      BasePointers.push_back(VDLVal.getPointer());
+      Pointers.push_back(ThisLVal.getPointer());
+      Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
+      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE |
+                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+    }
+    for (const LambdaCapture &LC : RD->captures()) {
+      if (LC.getCaptureKind() != LCK_ByRef)
+        continue;
+      const VarDecl *VD = LC.getCapturedVar();
+      auto It = Captures.find(VD);
+      assert(It != Captures.end() && "Found lambda capture without field.");
+      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+      BasePointers.push_back(VDLVal.getPointer());
+      Pointers.push_back(VarLVal.getPointer());
+      Sizes.push_back(CGF.getTypeSize(
+          VD->getType().getCanonicalType().getNonReferenceType()));
+      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE |
+                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+    }
+  }
+
+  /// Set correct indices for lambdas captures.
+  void adjustMemberOfForLambdaCaptures(MapBaseValuesArrayTy &BasePointers,
+                                       MapValuesArrayTy &Pointers,
+                                       MapFlagsArrayTy &Types) const {
+    for (unsigned I = 0, E = Types.size(); I < E; ++I) {
+      // Set correct member_of idx for all implicit lambda captures.
+      if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE |
+                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
+        continue;
+      llvm::Value *BasePtr = *BasePointers[I];
+      int TgtIdx = -1;
+      for (unsigned J = I; J > 0; --J) {
+        unsigned Idx = J - 1;
+        if (Pointers[Idx] != BasePtr)
+          continue;
+        TgtIdx = Idx;
+        break;
+      }
+      assert(TgtIdx != -1 && "Unable to find parent lambda.");
+      // All other current entries will be MEMBER_OF the combined entry
+      // (except for PTR_AND_OBJ entries which do not have a placeholder value
+      // 0xFFFF in the MEMBER_OF field).
+      OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
+      setCorrectMemberOfFlag(Types[I], MemberOfFlag);
+    }
+  }
+
   /// Generate the base pointers, section pointers, sizes and map types
   /// associated to a given capture.
   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
@@ -8133,6 +8203,12 @@ void CGOpenMPRuntime::emitTargetCall(Cod
         if (CurBasePointers.empty())
           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
                                            CurPointers, CurSizes, CurMapTypes);
+        // Generate correct mapping for variables captured by reference in
+        // lambdas.
+        if (CI->capturesVariable())
+          MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+                                                  CurBasePointers, CurPointers,
+                                                  CurSizes, CurMapTypes);
       }
       // We expect to have at least an element of information for this capture.
       assert(!CurBasePointers.empty() &&
@@ -8154,6 +8230,8 @@ void CGOpenMPRuntime::emitTargetCall(Cod
       Sizes.append(CurSizes.begin(), CurSizes.end());
       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
     }
+    // Adjust MEMBER_OF flags for the lambdas captures.
+    MEHandler.adjustMemberOfForLambdaCaptures(BasePointers, Pointers, MapTypes);
     // Map other list items in the map clause which are not captured variables
     // but "declare target link" global variables.
     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
@@ -8465,6 +8543,12 @@ void CGOpenMPRuntime::emitDeferredTarget
   }
 }
 
+void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+         " Expected target-based directive.");
+}
+
 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
     CodeGenModule &CGM)
     : CGM(CGM) {

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=345609&r1=345608&r2=345609&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Tue Oct 30 08:50:12 2018
@@ -1543,6 +1543,12 @@ public:
 
   /// Emit deferred declare target variables marked for deferred emission.
   void emitDeferredTargetDecls() const;
+
+  /// Adjust some parameters for the target-based directives, like addresses of
+  /// the variables captured by reference in lambdas.
+  virtual void
+  adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D) const;
 };
 
 /// Class supports emissionof SIMD-only code.

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=345609&r1=345608&r2=345609&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Tue Oct 30 08:50:12 2018
@@ -4255,3 +4255,56 @@ void CGOpenMPRuntimeNVPTX::getDefaultSch
       CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
       SourceLocation());
 }
+
+void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+         " Expected target-based directive.");
+  const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
+  for (const CapturedStmt::Capture &C : CS->captures()) {
+    // Capture variables captured by reference in lambdas for target-based
+    // directives.
+    if (!C.capturesVariable())
+      continue;
+    const VarDecl *VD = C.getCapturedVar();
+    const auto *RD = VD->getType()
+                         .getCanonicalType()
+                         .getNonReferenceType()
+                         ->getAsCXXRecordDecl();
+    if (!RD || !RD->isLambda())
+      continue;
+    Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+    LValue VDLVal;
+    if (VD->getType().getCanonicalType()->isReferenceType())
+      VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
+    else
+      VDLVal = CGF.MakeAddrLValue(
+          VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+    llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+    FieldDecl *ThisCapture = nullptr;
+    RD->getCaptureFields(Captures, ThisCapture);
+    if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
+      LValue ThisLVal =
+          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+      llvm::Value *CXXThis = CGF.LoadCXXThis();
+      CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
+    }
+    for (const LambdaCapture &LC : RD->captures()) {
+      if (LC.getCaptureKind() != LCK_ByRef)
+        continue;
+      const VarDecl *VD = LC.getCapturedVar();
+      if (!CS->capturesVariable(VD))
+        continue;
+      auto It = Captures.find(VD);
+      assert(It != Captures.end() && "Found lambda capture without field.");
+      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+      Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+      if (VD->getType().getCanonicalType()->isReferenceType())
+        VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
+                                               VD->getType().getCanonicalType())
+                     .getAddress();
+      CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
+    }
+  }
+}
+

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=345609&r1=345608&r2=345609&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Tue Oct 30 08:50:12 2018
@@ -350,6 +350,11 @@ public:
       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
       const Expr *&ChunkExpr) const override;
 
+  /// Adjust some parameters for the target-based directives, like addresses of
+  /// the variables captured by reference in lambdas.
+  void adjustTargetSpecificDataForLambdas(
+      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
+
 private:
   /// Track the execution mode when codegening directives within a target
   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the

Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=345609&r1=345608&r2=345609&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Tue Oct 30 08:50:12 2018
@@ -1738,6 +1738,8 @@ static void emitOMPSimdRegion(CodeGenFun
     CGF.EmitOMPReductionClauseInit(S, LoopScope);
     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
     (void)LoopScope.Privatize();
+    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
                          S.getInc(),
                          [&S](CodeGenFunction &CGF) {
@@ -2296,6 +2298,8 @@ bool CodeGenFunction::EmitOMPWorksharing
       EmitOMPPrivateLoopCounters(S, LoopScope);
       EmitOMPLinearClause(S, LoopScope);
       (void)LoopScope.Privatize();
+      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+        CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
 
       // Detect the loop schedule kind and chunk.
       const Expr *ChunkExpr = nullptr;
@@ -2589,6 +2593,8 @@ void CodeGenFunction::EmitSections(const
     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
     CGF.EmitOMPReductionClauseInit(S, LoopScope);
     (void)LoopScope.Privatize();
+    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
 
     // Emit static non-chunked loop.
     OpenMPScheduleTy ScheduleKind;
@@ -3342,6 +3348,8 @@ void CodeGenFunction::EmitOMPDistributeL
       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitOMPPrivateLoopCounters(S, LoopScope);
       (void)LoopScope.Privatize();
+      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+        CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
 
       // Detect the distribute schedule kind and chunk.
       llvm::Value *Chunk = nullptr;
@@ -4071,6 +4079,8 @@ static void emitTargetRegion(CodeGenFunc
   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
   CGF.EmitOMPPrivateClause(S, PrivateScope);
   (void)PrivateScope.Privatize();
+  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+    CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
 
   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
 }
@@ -4151,6 +4161,8 @@ static void emitTargetTeamsRegion(CodeGe
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
     CGF.EmitStmt(CS->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
   };
@@ -4709,6 +4721,8 @@ static void emitTargetParallelRegion(Cod
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
     // TODO: Add support for clauses.
     CGF.EmitStmt(CS->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);

Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=345609&r1=345608&r2=345609&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Tue Oct 30 08:50:12 2018
@@ -164,6 +164,9 @@ private:
   OpenMPClauseKind ClauseKindMode = OMPC_unknown;
   Sema &SemaRef;
   bool ForceCapturing = false;
+  /// true if all the vaiables in the target executable directives must be
+  /// captured by reference.
+  bool ForceCaptureByReferenceInTargetExecutable = false;
   CriticalsWithHintsTy Criticals;
 
   using iterator = StackTy::const_reverse_iterator;
@@ -195,6 +198,13 @@ public:
   bool isForceVarCapturing() const { return ForceCapturing; }
   void setForceVarCapturing(bool V) { ForceCapturing = V; }
 
+  void setForceCaptureByReferenceInTargetExecutable(bool V) {
+    ForceCaptureByReferenceInTargetExecutable = V;
+  }
+  bool isForceCaptureByReferenceInTargetExecutable() const {
+    return ForceCaptureByReferenceInTargetExecutable;
+  }
+
   void push(OpenMPDirectiveKind DKind, const DeclarationNameInfo &DirName,
             Scope *CurScope, SourceLocation Loc) {
     if (Stack.empty() ||
@@ -1435,6 +1445,8 @@ bool Sema::isOpenMPCapturedByRef(const V
       // By default, all the data that has a scalar type is mapped by copy
       // (except for reduction variables).
       IsByRef =
+          (DSAStack->isForceCaptureByReferenceInTargetExecutable() &&
+           !Ty->isAnyPointerType()) ||
           !Ty->isScalarType() ||
           DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar ||
           DSAStack->hasExplicitDSA(
@@ -1444,10 +1456,12 @@ bool Sema::isOpenMPCapturedByRef(const V
 
   if (IsByRef && Ty.getNonReferenceType()->isScalarType()) {
     IsByRef =
-        !DSAStack->hasExplicitDSA(
-            D,
-            [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
-            Level, /*NotLastprivate=*/true) &&
+        ((DSAStack->isForceCaptureByReferenceInTargetExecutable() &&
+          !Ty->isAnyPointerType()) ||
+         !DSAStack->hasExplicitDSA(
+             D,
+             [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
+             Level, /*NotLastprivate=*/true)) &&
         // If the variable is artificial and must be captured by value - try to
         // capture by value.
         !(isa<OMPCapturedExprDecl>(D) && !D->hasAttr<OMPCaptureNoInitAttr>() &&
@@ -1509,6 +1523,42 @@ VarDecl *Sema::isOpenMPCapturedDecl(Valu
       return VD;
     }
   }
+  // Capture variables captured by reference in lambdas for target-based
+  // directives.
+  if (VD && !DSAStack->isClauseParsingMode()) {
+    if (const auto *RD = VD->getType()
+                             .getCanonicalType()
+                             .getNonReferenceType()
+                             ->getAsCXXRecordDecl()) {
+      bool SavedForceCaptureByReferenceInTargetExecutable =
+          DSAStack->isForceCaptureByReferenceInTargetExecutable();
+      DSAStack->setForceCaptureByReferenceInTargetExecutable(/*V=*/true);
+      if (RD->isLambda())
+        for (const LambdaCapture &LC : RD->captures()) {
+          if (LC.getCaptureKind() == LCK_ByRef) {
+            VarDecl *VD = LC.getCapturedVar();
+            DeclContext *VDC = VD->getDeclContext();
+            if (!VDC->Encloses(CurContext))
+              continue;
+            DSAStackTy::DSAVarData DVarPrivate =
+                DSAStack->getTopDSA(VD, /*FromParent=*/false);
+            // Do not capture already captured variables.
+            if (!OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
+                DVarPrivate.CKind == OMPC_unknown &&
+                !DSAStack->checkMappableExprComponentListsForDecl(
+                    D, /*CurrentRegionOnly=*/true,
+                    [](OMPClauseMappableExprCommon::
+                           MappableExprComponentListRef,
+                       OpenMPClauseKind) { return true; }))
+              MarkVariableReferenced(LC.getLocation(), LC.getCapturedVar());
+          } else if (LC.getCaptureKind() == LCK_This) {
+            CheckCXXThisCapture(LC.getLocation());
+          }
+        }
+      DSAStack->setForceCaptureByReferenceInTargetExecutable(
+          SavedForceCaptureByReferenceInTargetExecutable);
+    }
+  }
 
   if (DSAStack->getCurrentDirective() != OMPD_unknown &&
       (!DSAStack->isClauseParsingMode() ||

Added: cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp?rev=345609&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp (added)
+++ cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp Tue Oct 30 08:50:12 2018
@@ -0,0 +1,132 @@
+// REQUIRES: powerpc-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=CLASS,FUN,CHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=CLASS,CHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=FUN,CHECK
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4, i64 4]
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 544, i64 33, i64 673, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 800]
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4]
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 547, i64 544, i64 547, i64 673, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953421968]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953421968]
+// CHECK-DAG: [[S:%.+]] = type { i32 }
+// CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* }
+// CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* }
+
+// CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63_worker()
+// CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63([[S]]* {{%.+}}, [[CAP1]]* dereferenceable(8) {{%.+}})
+// CLASS-NOT: getelementptr
+// CLASS: br i1 %
+// CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63_worker()
+// CLASS: br label %
+// CLASS: br i1 %
+// CLASS: call void @__kmpc_kernel_init(
+// CLASS: call void @__kmpc_data_sharing_init_stack()
+// CLASS: call void @llvm.memcpy.
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]],
+// CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0
+// CLASS: store [[S]]* [[S_:%.+]], [[S]]** [[THIS_REF]],
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]],
+// CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* [[L]])
+// CLASS: ret void
+
+// CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l65([[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}})
+// CLASS-NOT: getelementptr
+// CLASS: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* %{{.+}})
+// CLASS: ret void
+
+// CLASS: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}})
+// CLASS-NOT: getelementptr
+// CLASS: call void @llvm.memcpy.
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]],
+// CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0
+// CLASS: store [[S]]* %{{.+}}, [[S]]** [[THIS_REF]],
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]],
+// CLASS: call i32 [[LAMBDA1]]([[CAP1]]* [[L]])
+// CLASS: ret void
+
+struct S {
+  int a = 15;
+  int foo() {
+    auto &&L = [&]() { return a; };
+#pragma omp target
+    L();
+#pragma omp target parallel
+    L();
+    return a;
+  }
+} s;
+
+// FUN: define internal void @__omp_offloading_{{.+}}_main_l125_worker()
+// FUN: define weak void @__omp_offloading_{{.+}}_main_l125(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}, i64 %{{.+}})
+// FUN-NOT: getelementptr
+// FUN: br i1 %
+// FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l125_worker()
+// FUN: br label %
+// FUN: br i1 %
+// FUN: call void @__kmpc_kernel_init(
+// FUN: call void @__kmpc_data_sharing_init_stack()
+// FUN: call void @llvm.memcpy.
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
+// FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0
+// FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]],
+// FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1
+// FUN: store i32* %{{.+}}, i32** [[B_CAP]],
+// FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2
+// FUN: store i32* %{{.+}}, i32** [[C_CAP]],
+// FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3
+// FUN: store i32** %{{.+}}, i32*** [[D_CAP]],
+// FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4
+// FUN: store i32* %{{.+}}, i32** [[A_CAP]],
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
+// FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* [[L]])
+// FUN: ret void
+
+// FUN: define weak void @__omp_offloading_{{.+}}_main_l127(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}} i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}})
+// FUN-NOT: getelementptr
+// FUN: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, [[CAP2]]* %{{.+}})
+// FUN: ret void
+
+// FUN: define internal void [[PARALLEL:@.+]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}})
+// FUN-NOT: getelementptr
+// FUN: call void @llvm.memcpy.
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
+// FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0
+// FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]],
+// FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1
+// FUN: store i32* %{{.+}}, i32** [[B_CAP]],
+// FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2
+// FUN: store i32* %{{.+}}, i32** [[C_CAP]],
+// FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3
+// FUN: store i32** %{{.+}}, i32*** [[D_CAP]],
+// FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4
+// FUN: store i32* %{{.+}}, i32** [[A_CAP]],
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
+// FUN: call i64 [[LAMBDA2]]([[CAP2]]* [[L]])
+// FUN: ret void
+
+int main(int argc, char **argv) {
+  int &b = argc;
+  int &&c = 1;
+  int *d = &argc;
+  int a;
+  auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; };
+#pragma omp target firstprivate(argc) map(to : a)
+  L();
+#pragma omp target parallel
+  L();
+  return argc + s.foo();
+}
+
+#endif // HEADER




More information about the cfe-commits mailing list