r267395 - [OPENMP 4.5] Codegen for 'taskloop' directive.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 25 05:22:35 PDT 2016


Author: abataev
Date: Mon Apr 25 07:22:29 2016
New Revision: 267395

URL: http://llvm.org/viewvc/llvm-project?rev=267395&view=rev
Log:
[OPENMP 4.5] Codegen for 'taskloop' directive.

The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed.
The next code will be generated for the taskloop directive:
    #pragma omp taskloop num_tasks(N) lastprivate(j)
        for( i=0; i<N*GRAIN*STRIDE-1; i+=STRIDE ) {
          int th = omp_get_thread_num();
          #pragma omp atomic
            counter++;
          #pragma omp atomic
            th_counter[th]++;
          j = i;
    }

Generated code:
task = __kmpc_omp_task_alloc(NULL,gtid,1,sizeof(struct
task),sizeof(struct shar),&task_entry);
psh = task->shareds;
psh->pth_counter = &th_counter;
psh->pcounter = &counter;
psh->pj = &j;
task->lb = 0;
task->ub = N*GRAIN*STRIDE-2;
task->st = STRIDE;
__kmpc_taskloop(
NULL,             // location
gtid,             // gtid
task,             // task structure
1,                // if clause value
&task->lb,        // lower bound
&task->ub,        // upper bound
STRIDE,           // loop increment
0,                // 1 if nogroup specified
2,                // schedule type: 0-none, 1-grainsize, 2-num_tasks
N,                // schedule value (ignored for type 0)
(void*)&__task_dup_entry // tasks duplication routine
);

Added:
    cfe/trunk/test/OpenMP/taskloop_codegen.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
    cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/lib/Sema/SemaOpenMP.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=267395&r1=267394&r2=267395&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Apr 25 07:22:29 2016
@@ -608,9 +608,13 @@ enum OpenMPRTLFunction {
   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
   // kmp_int32 num_teams, kmp_int32 thread_limit);
   OMPRTL__kmpc_push_num_teams,
-  /// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
-  /// kmpc_micro microtask, ...);
+  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
+  // microtask, ...);
   OMPRTL__kmpc_fork_teams,
+  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+  // sched, kmp_uint64 grainsize, void *task_dup);
+  OMPRTL__kmpc_taskloop,
 
   //
   // Offloading related calls
@@ -842,10 +846,11 @@ llvm::Value *CGOpenMPRuntime::emitTaskOu
   assert(!ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 for tasks");
   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+  auto *TD = dyn_cast<OMPTaskDirective>(&D);
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPTaskOutlinedRegionInfo CGInfo(
-      *CS, ThreadIDVar, CodeGen, InnermostKind,
-      cast<OMPTaskDirective>(D).hasCancel(), Action);
+  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
+                                        InnermostKind,
+                                        TD ? TD->hasCancel() : false, Action);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
   auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
   if (!Tied)
@@ -1433,6 +1438,26 @@ CGOpenMPRuntime::createRuntimeFunction(u
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
     break;
   }
+  case OMPRTL__kmpc_taskloop: {
+    // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+    // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+    // sched, kmp_uint64 grainsize, void *task_dup);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.IntTy,
+                                CGM.VoidPtrTy,
+                                CGM.IntTy,
+                                CGM.Int64Ty->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo(),
+                                CGM.Int64Ty,
+                                CGM.IntTy,
+                                CGM.IntTy,
+                                CGM.Int64Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
+    break;
+  }
   case OMPRTL__tgt_target: {
     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
@@ -2492,6 +2517,14 @@ enum KmpTaskTFields {
   KmpTaskTPartId,
   /// \brief Function with call of destructors for private variables.
   KmpTaskTDestructors,
+  /// (Taskloops only) Lower bound.
+  KmpTaskTLowerBound,
+  /// (Taskloops only) Upper bound.
+  KmpTaskTUpperBound,
+  /// (Taskloops only) Stride.
+  KmpTaskTStride,
+  /// (Taskloops only) Is last iteration flag.
+  KmpTaskTLastIter,
 };
 } // anonymous namespace
 
@@ -2999,7 +3032,8 @@ createPrivatesRecordDecl(CodeGenModule &
 }
 
 static RecordDecl *
-createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
+createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
+                         QualType KmpInt32Ty,
                          QualType KmpRoutineEntryPointerQTy) {
   auto &C = CGM.getContext();
   // Build struct kmp_task_t {
@@ -3007,6 +3041,11 @@ createKmpTaskTRecordDecl(CodeGenModule &
   //         kmp_routine_entry_t routine;
   //         kmp_int32           part_id;
   //         kmp_routine_entry_t destructors;
+  // For taskloops additional fields:
+  //         kmp_uint64          lb;
+  //         kmp_uint64          ub;
+  //         kmp_int64           st;
+  //         kmp_int32           liter;
   //       };
   auto *RD = C.buildImplicitRecord("kmp_task_t");
   RD->startDefinition();
@@ -3014,6 +3053,16 @@ createKmpTaskTRecordDecl(CodeGenModule &
   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   addFieldToRecordDecl(C, RD, KmpInt32Ty);
   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+  if (isOpenMPTaskLoopDirective(Kind)) {
+    QualType KmpUInt64Ty =
+        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+    QualType KmpInt64Ty =
+        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpInt32Ty);
+  }
   RD->completeDefinition();
   return RD;
 }
@@ -3041,13 +3090,16 @@ createKmpTaskTWithPrivatesRecordDecl(Cod
 /// \code
 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
+///   For taskloops:
+///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
 ///   tt->shareds);
 ///   return 0;
 /// }
 /// \endcode
 static llvm::Value *
 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
-                      QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
+                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
+                      QualType KmpTaskTWithPrivatesPtrQTy,
                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
                       llvm::Value *TaskPrivatesMap) {
@@ -3071,7 +3123,10 @@ emitProxyTaskFunction(CodeGenModule &CGM
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
 
   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
-  // tt, tt->task_data.shareds);
+  // tt,
+  // For taskloops:
+  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
+  // tt->task_data.shareds);
   auto *GtidParam = CGF.EmitLoadOfScalar(
       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
   LValue TDBase = CGF.EmitLoadOfPointerLValue(
@@ -3098,16 +3153,37 @@ emitProxyTaskFunction(CodeGenModule &CGM
     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
-  } else {
+  } else
     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+
+  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
+                               TaskPrivatesMap,
+                               CGF.Builder
+                                   .CreatePointerBitCastOrAddrSpaceCast(
+                                       TDBase.getAddress(), CGF.VoidPtrTy)
+                                   .getPointer()};
+  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
+                                          std::end(CommonArgs));
+  if (isOpenMPTaskLoopDirective(Kind)) {
+    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
+    auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
+    auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
+    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
+    auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
+    auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
+    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
+    auto StLVal = CGF.EmitLValueForField(Base, *StFI);
+    auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
+    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
+    auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
+    auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+    CallArgs.push_back(LBParam);
+    CallArgs.push_back(UBParam);
+    CallArgs.push_back(StParam);
+    CallArgs.push_back(LIParam);
   }
+  CallArgs.push_back(SharedsParam);
 
-  llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
-                             TaskPrivatesMap,
-                             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-                                            TDBase.getAddress(), CGF.VoidPtrTy)
-                                 .getPointer(),
-                             SharedsParam};
   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   CGF.EmitStoreThroughLValue(
       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
@@ -3244,20 +3320,17 @@ static int array_pod_sort_comparator(con
   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
 }
 
-void CGOpenMPRuntime::emitTaskCall(
+CGOpenMPRuntime::TaskDataTy CGOpenMPRuntime::emitTaskInit(
     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
     unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
-    Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+    Address Shareds, ArrayRef<const Expr *> PrivateVars,
     ArrayRef<const Expr *> PrivateCopies,
     ArrayRef<const Expr *> FirstprivateVars,
     ArrayRef<const Expr *> FirstprivateCopies,
-    ArrayRef<const Expr *> FirstprivateInits,
-    ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
-  if (!CGF.HaveInsertPoint())
-    return;
+    ArrayRef<const Expr *> FirstprivateInits) {
   auto &C = CGM.getContext();
-  llvm::SmallVector<PrivateDataTy, 8> Privates;
+  llvm::SmallVector<PrivateDataTy, 4> Privates;
   // Aggregate privates and sort them by the alignment.
   auto I = PrivateCopies.begin();
   for (auto *E : PrivateVars) {
@@ -3287,8 +3360,8 @@ void CGOpenMPRuntime::emitTaskCall(
   emitKmpRoutineEntryT(KmpInt32Ty);
   // Build type kmp_task_t (if not built yet).
   if (KmpTaskTQTy.isNull()) {
-    KmpTaskTQTy = C.getRecordType(
-        createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
+    KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
+        CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
   }
   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   // Build particular struct kmp_task_t for the given task.
@@ -3321,8 +3394,9 @@ void CGOpenMPRuntime::emitTaskCall(
   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
   // kmp_task_t *tt);
   auto *TaskEntry = emitProxyTaskFunction(
-      CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
-      KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
+      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
+      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
+      TaskPrivatesMap);
 
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
@@ -3454,7 +3528,38 @@ void CGOpenMPRuntime::emitTaskCall(
   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
                             DestructorFn, KmpRoutineEntryPtrTy),
                         Destructor);
+  TaskDataTy Data;
+  Data.NewTask = NewTask;
+  Data.TaskEntry = TaskEntry;
+  Data.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
+  Data.TDBase = TDBase;
+  Data.KmpTaskTQTyRD = KmpTaskTQTyRD;
+  return Data;
+}
 
+void CGOpenMPRuntime::emitTaskCall(
+    CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
+    bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+    unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+    Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+    ArrayRef<const Expr *> PrivateCopies,
+    ArrayRef<const Expr *> FirstprivateVars,
+    ArrayRef<const Expr *> FirstprivateCopies,
+    ArrayRef<const Expr *> FirstprivateInits,
+    ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  TaskDataTy Data =
+      emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction,
+                   SharedsTy, Shareds, PrivateVars, PrivateCopies,
+                   FirstprivateVars, FirstprivateCopies, FirstprivateInits);
+  llvm::Value *NewTask = Data.NewTask;
+  llvm::Value *TaskEntry = Data.TaskEntry;
+  llvm::Value *NewTaskNewTaskTTy = Data.NewTaskNewTaskTTy;
+  LValue TDBase = Data.TDBase;
+  RecordDecl *KmpTaskTQTyRD = Data.KmpTaskTQTyRD;
+  auto &C = CGM.getContext();
   // Process list of dependences.
   Address DependenciesArray = Address::invalid();
   unsigned NumDependencies = Dependences.size();
@@ -3629,6 +3734,71 @@ void CGOpenMPRuntime::emitTaskCall(
   }
 }
 
+void CGOpenMPRuntime::emitTaskLoopCall(
+    CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
+    bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, bool Nogroup,
+    unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+    Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+    ArrayRef<const Expr *> PrivateCopies,
+    ArrayRef<const Expr *> FirstprivateVars,
+    ArrayRef<const Expr *> FirstprivateCopies,
+    ArrayRef<const Expr *> FirstprivateInits) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  TaskDataTy Data =
+      emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction,
+                   SharedsTy, Shareds, PrivateVars, PrivateCopies,
+                   FirstprivateVars, FirstprivateCopies, FirstprivateInits);
+  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+  // libcall.
+  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+  // sched, kmp_uint64 grainsize, void *task_dup);
+  llvm::Value *ThreadID = getThreadID(CGF, Loc);
+  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+  llvm::Value *IfVal;
+  if (IfCond) {
+    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
+                                      /*isSigned=*/true);
+  } else
+    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
+
+  LValue LBLVal = CGF.EmitLValueForField(
+      Data.TDBase,
+      *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
+  auto *LBVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
+  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  LValue UBLVal = CGF.EmitLValueForField(
+      Data.TDBase,
+      *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
+  auto *UBVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
+  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  LValue StLVal = CGF.EmitLValueForField(
+      Data.TDBase,
+      *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
+  auto *StVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
+  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  llvm::Value *TaskArgs[] = {
+      UpLoc,
+      ThreadID,
+      Data.NewTask,
+      IfVal,
+      LBLVal.getPointer(),
+      UBLVal.getPointer(),
+      CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+      llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0),
+      llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0),
+      llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+      llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
+}
+
 /// \brief Emit reduction operation for each element of array (required for
 /// array sections) LHS op = RHS.
 /// \param Type Type of array.

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=267395&r1=267394&r2=267395&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Mon Apr 25 07:22:29 2016
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 
+#include "CGValue.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
@@ -37,6 +38,7 @@ namespace clang {
 class Expr;
 class GlobalDecl;
 class OMPExecutableDirective;
+class OMPLoopDirective;
 class VarDecl;
 class OMPDeclareReductionDecl;
 class IdentifierInfo;
@@ -431,6 +433,64 @@ private:
   ///
   llvm::Value *getCriticalRegionLock(StringRef CriticalName);
 
+  struct TaskDataTy {
+    llvm::Value *NewTask;
+    llvm::Value *TaskEntry;
+    llvm::Value *NewTaskNewTaskTTy;
+    LValue TDBase;
+    RecordDecl *KmpTaskTQTyRD;
+  };
+  /// Emit task region for the task directive. The task region is emitted in
+  /// several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// \param D Current task directive.
+  /// \param Tied true if the task is tied (the task is tied to the thread that
+  /// can suspend its task region), false - untied (the task is not tied to any
+  /// thread).
+  /// \param Final Contains either constant bool value, or llvm::Value * of i1
+  /// type for final clause. If the value is true, the task forces all of its
+  /// child tasks to become final and included tasks.
+  /// \param NumberOfParts Number of parts in untied tasks.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+  /// otherwise.
+  /// \param PrivateVars List of references to private variables for the task
+  /// directive.
+  /// \param PrivateCopies List of private copies for each private variable in
+  /// \p PrivateVars.
+  /// \param FirstprivateVars List of references to private variables for the
+  /// task directive.
+  /// \param FirstprivateCopies List of private copies for each private variable
+  /// in \p FirstprivateVars.
+  /// \param FirstprivateInits List of references to auto generated variables
+  /// used for initialization of a single array element. Used if firstprivate
+  /// variable is of array type.
+  TaskDataTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
+                          const OMPExecutableDirective &D, bool Tied,
+                          llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+                          unsigned NumberOfParts, llvm::Value *TaskFunction,
+                          QualType SharedsTy, Address Shareds,
+                          ArrayRef<const Expr *> PrivateVars,
+                          ArrayRef<const Expr *> PrivateCopies,
+                          ArrayRef<const Expr *> FirstprivateVars,
+                          ArrayRef<const Expr *> FirstprivateCopies,
+                          ArrayRef<const Expr *> FirstprivateInits);
+
 public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
   virtual ~CGOpenMPRuntime() {}
@@ -774,6 +834,62 @@ public:
       ArrayRef<const Expr *> FirstprivateInits,
       ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences);
 
+  /// Emit task region for the taskloop directive. The taskloop region is
+  /// emitted in several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t
+  /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int
+  /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task
+  /// is a resulting structure from
+  /// previous items.
+  /// \param D Current task directive.
+  /// \param Tied true if the task is tied (the task is tied to the thread that
+  /// can suspend its task region), false - untied (the task is not tied to any
+  /// thread).
+  /// \param Final Contains either constant bool value, or llvm::Value * of i1
+  /// type for final clause. If the value is true, the task forces all of its
+  /// child tasks to become final and included tasks.
+  /// \param Nogroup true if nogroup clause was specified, false otherwise.
+  /// \param NumberOfParts Number of parts in untied taskloops.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+  /// otherwise.
+  /// \param PrivateVars List of references to private variables for the task
+  /// directive.
+  /// \param PrivateCopies List of private copies for each private variable in
+  /// \p PrivateVars.
+  /// \param FirstprivateVars List of references to private variables for the
+  /// task directive.
+  /// \param FirstprivateCopies List of private copies for each private variable
+  /// in \p FirstprivateVars.
+  /// \param FirstprivateInits List of references to auto generated variables
+  /// used for initialization of a single array element. Used if firstprivate
+  /// variable is of array type.
+  virtual void emitTaskLoopCall(
+      CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
+      bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+      bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction,
+      QualType SharedsTy, Address Shareds, const Expr *IfCond,
+      ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies,
+      ArrayRef<const Expr *> FirstprivateVars,
+      ArrayRef<const Expr *> FirstprivateCopies,
+      ArrayRef<const Expr *> FirstprivateInits);
+
   /// \brief Emit code for the directive that does not require outlining.
   ///
   /// \param InnermostKind Kind of innermost directive (for simple directives it

Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=267395&r1=267394&r2=267395&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Mon Apr 25 07:22:29 2016
@@ -1630,8 +1630,8 @@ void CodeGenFunction::EmitOMPOuterLoop(b
     // IV < UB
     BoolCondVal = EvaluateExprAsBool(S.getCond());
   } else {
-    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
-                                    IL, LB, UB, ST);
+    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
+                                 LB, UB, ST);
   }
 
   // If there are any cleanups between here and the loop-exit scope,
@@ -2280,10 +2280,12 @@ void CodeGenFunction::EmitOMPParallelSec
   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+                                                const RegionCodeGenTy &BodyGen,
+                                                const TaskGenTy &TaskGen,
+                                                bool Tied) {
   // Emit outlined function for task construct.
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto *I = CS->getCapturedDecl()->param_begin();
   auto *PartId = std::next(I);
   auto *TaskT = std::next(I, 4);
@@ -2291,52 +2293,44 @@ void CodeGenFunction::EmitOMPTaskDirecti
   // part id (0 for tied tasks, >=0 for untied task).
   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
   // Get list of private variables.
-  llvm::SmallVector<const Expr *, 8> PrivateVars;
-  llvm::SmallVector<const Expr *, 8> PrivateCopies;
+  OMPPrivateDataTy Data;
+  Data.Tied = Tied;
   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (auto *IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
-        PrivateVars.push_back(*IRef);
-        PrivateCopies.push_back(IInit);
+        Data.PrivateVars.push_back(*IRef);
+        Data.PrivateCopies.push_back(IInit);
       }
       ++IRef;
     }
   }
   EmittedAsPrivate.clear();
   // Get list of firstprivate variables.
-  llvm::SmallVector<const Expr *, 8> FirstprivateVars;
-  llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
-  llvm::SmallVector<const Expr *, 8> FirstprivateInits;
   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto IElemInitRef = C->inits().begin();
     for (auto *IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
-        FirstprivateVars.push_back(*IRef);
-        FirstprivateCopies.push_back(IInit);
-        FirstprivateInits.push_back(*IElemInitRef);
+        Data.FirstprivateVars.push_back(*IRef);
+        Data.FirstprivateCopies.push_back(IInit);
+        Data.FirstprivateInits.push_back(*IElemInitRef);
       }
       ++IRef;
       ++IElemInitRef;
     }
   }
   // Build list of dependences.
-  llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
-      Dependences;
-  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
-    for (auto *IRef : C->varlists()) {
-      Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
-    }
-  }
-  auto &&CodeGen = [&S, &PrivateVars, &FirstprivateVars](
-      CodeGenFunction &CGF, PrePostActionTy &Action) {
-    OMPPrivateScope Scope(CGF);
+  for (const auto *C : S.getClausesOfKind<OMPDependClause>())
+    for (auto *IRef : C->varlists())
+      Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+  auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen](CodeGenFunction &CGF,
+                                                     PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
-    auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
-    if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
+    OMPPrivateScope Scope(CGF);
+    if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty()) {
       auto *CopyFn = CGF.Builder.CreateLoad(
           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
       auto *PrivatesPtr = CGF.Builder.CreateLoad(
@@ -2345,14 +2339,14 @@ void CodeGenFunction::EmitOMPTaskDirecti
       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
       llvm::SmallVector<llvm::Value *, 16> CallArgs;
       CallArgs.push_back(PrivatesPtr);
-      for (auto *E : PrivateVars) {
+      for (auto *E : Data.PrivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
         Address PrivatePtr = CGF.CreateMemTemp(
             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
         CallArgs.push_back(PrivatePtr.getPointer());
       }
-      for (auto *E : FirstprivateVars) {
+      for (auto *E : Data.FirstprivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
         Address PrivatePtr =
             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
@@ -2370,13 +2364,21 @@ void CodeGenFunction::EmitOMPTaskDirecti
     (void)Scope.Privatize();
 
     Action.Enter(CGF);
-    CGF.EmitStmt(CS->getCapturedStmt());
+    BodyGen(CGF);
   };
+  auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
+      Data.NumberOfParts);
+  OMPLexicalScope Scope(*this, S);
+  TaskGen(*this, OutlinedFn, Data);
+}
+
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+  // Emit outlined function for task construct.
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   // Check if we should emit tied or untied task.
   bool Tied = !S.getSingleClause<OMPUntiedClause>();
-  unsigned NumberOfParts;
-  auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
-      S, *I, *PartId, *TaskT, OMPD_task, CodeGen, Tied, NumberOfParts);
   // Check if the task is final
   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
@@ -2401,11 +2403,20 @@ void CodeGenFunction::EmitOMPTaskDirecti
       break;
     }
   }
-  OMPLexicalScope Scope(*this, S);
-  CGM.getOpenMPRuntime().emitTaskCall(
-      *this, S.getLocStart(), S, Tied, Final, NumberOfParts, OutlinedFn,
-      SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies,
-      FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences);
+
+  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitStmt(CS->getCapturedStmt());
+  };
+  auto &&TaskGen = [&S, &Final, SharedsTy, CapturedStruct,
+                    IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+                            const OMPPrivateDataTy &Data) {
+    CGF.CGM.getOpenMPRuntime().emitTaskCall(
+        CGF, S.getLocStart(), S, Data.Tied, Final, Data.NumberOfParts,
+        OutlinedFn, SharedsTy, CapturedStruct, IfCond, Data.PrivateVars,
+        Data.PrivateCopies, Data.FirstprivateVars, Data.FirstprivateCopies,
+        Data.FirstprivateInits, Data.Dependences);
+  };
+  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Tied);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -3230,15 +3241,136 @@ void CodeGenFunction::EmitOMPTargetParal
   // TODO: codegen for target parallel for.
 }
 
+/// Emit a helper variable and return corresponding lvalue.
+static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
+                     const ImplicitParamDecl *PVD,
+                     CodeGenFunction::OMPPrivateScope &Privates) {
+  auto *VDecl = cast<VarDecl>(Helper->getDecl());
+  Privates.addPrivate(
+      VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
+}
+
+void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
+  assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
+  // Emit outlined function for task construct.
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+  const Expr *IfCond = nullptr;
+  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+    if (C->getNameModifier() == OMPD_unknown ||
+        C->getNameModifier() == OMPD_taskloop) {
+      IfCond = C->getCondition();
+      break;
+    }
+  }
+  bool Nogroup = S.getSingleClause<OMPNogroupClause>();
+  // TODO: Check if we should emit tied or untied task.
+  // Check if the task is final
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
+  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
+    // If the condition constant folds and can be elided, try to avoid emitting
+    // the condition and the dead arm of the if/else.
+    auto *Cond = Clause->getCondition();
+    bool CondConstant;
+    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
+      Final.setInt(CondConstant);
+    else
+      Final.setPointer(EvaluateExprAsBool(Cond));
+  } else {
+    // By default the task is not final.
+    Final.setInt(/*IntVal=*/false);
+  }
+
+  auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
+    // if (PreCond) {
+    //   for (IV in 0..LastIteration) BODY;
+    //   <Final counter/linear vars updates>;
+    // }
+    //
+
+    // Emit: if (PreCond) - begin.
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    OMPLoopScope PreInitScope(CGF, S);
+    if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return;
+    } else {
+      auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
+      ContBlock = CGF.createBasicBlock("taskloop.if.end");
+      emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
+                  CGF.getProfileCount(&S));
+      CGF.EmitBlock(ThenBlock);
+      CGF.incrementProfileCounter(&S);
+    }
+
+    OMPPrivateScope LoopScope(CGF);
+    // Emit helper vars inits.
+    enum { LowerBound = 5, UpperBound, Stride, LastIter };
+    auto *I = CS->getCapturedDecl()->param_begin();
+    auto *LBP = std::next(I, LowerBound);
+    auto *UBP = std::next(I, UpperBound);
+    auto *STP = std::next(I, Stride);
+    auto *LIP = std::next(I, LastIter);
+    mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
+             LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
+             LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
+             LoopScope);
+    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+    (void)LoopScope.Privatize();
+    // Emit the loop iteration variable.
+    const Expr *IVExpr = S.getIterationVariable();
+    const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+    CGF.EmitVarDecl(*IVDecl);
+    CGF.EmitIgnoredExpr(S.getInit());
+
+    // Emit the iterations count variable.
+    // If it is not a variable, Sema decided to calculate iterations count on
+    // each iteration (e.g., it is foldable into a constant).
+    if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+      CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+      // Emit calculation of the iterations count.
+      CGF.EmitIgnoredExpr(S.getCalcLastIteration());
+    }
+
+    CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                         S.getInc(),
+                         [&S](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, JumpDest());
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [](CodeGenFunction &) {});
+    // Emit: if (PreCond) - end.
+    if (ContBlock) {
+      CGF.EmitBranch(ContBlock);
+      CGF.EmitBlock(ContBlock, true);
+    }
+  };
+  auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final,
+                    Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+                             const OMPPrivateDataTy &Data) {
+    auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
+      OMPLoopScope PreInitScope(CGF, S);
+      CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(
+          CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup,
+          Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
+          Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars,
+          Data.FirstprivateCopies, Data.FirstprivateInits);
+    };
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
+                                                    CodeGen);
+  };
+  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, /*Tied=*/true);
+}
+
 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
-  // emit the code inside the construct for now
-  OMPLexicalScope Scope(*this, S);
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-        OMPLoopScope PreInitScope(CGF, S);
-        CGF.EmitStmt(
-            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-      });
+  EmitOMPTaskLoopBasedDirective(S);
 }
 
 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=267395&r1=267394&r2=267395&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Mon Apr 25 07:22:29 2016
@@ -85,6 +85,7 @@ class BlockByrefHelpers;
 class BlockByrefInfo;
 class BlockFlags;
 class BlockFieldFlags;
+class RegionCodeGenTy;
 class TargetCodeGenInfo;
 
 /// The kind of evaluation to perform on values of a particular
@@ -2340,6 +2341,24 @@ public:
   /// \param D Directive (possibly) with the 'linear' clause.
   void EmitOMPLinearClauseInit(const OMPLoopDirective &D);
 
+  struct OMPPrivateDataTy {
+    bool Tied;
+    unsigned NumberOfParts;
+    SmallVector<const Expr *, 4> PrivateVars;
+    SmallVector<const Expr *, 4> PrivateCopies;
+    SmallVector<const Expr *, 4> FirstprivateVars;
+    SmallVector<const Expr *, 4> FirstprivateCopies;
+    SmallVector<const Expr *, 4> FirstprivateInits;
+    SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences;
+  };
+  typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/,
+                                        llvm::Value * /*OutlinedFn*/,
+                                        const OMPPrivateDataTy & /*Data*/)>
+      TaskGenTy;
+  void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+                                 const RegionCodeGenTy &BodyGen,
+                                 const TaskGenTy &TaskGen, bool Tied);
+
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
@@ -2371,6 +2390,7 @@ public:
   void
   EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S);
   void EmitOMPCancelDirective(const OMPCancelDirective &S);
+  void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S);
   void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
   void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);

Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=267395&r1=267394&r2=267395&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Mon Apr 25 07:22:29 2016
@@ -1675,11 +1675,37 @@ void Sema::ActOnOpenMPRegionStart(OpenMP
     break;
   }
   case OMPD_taskloop: {
+    QualType KmpInt32Ty =
+        Context.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+    QualType KmpUInt64Ty =
+        Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+    QualType KmpInt64Ty =
+        Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+    QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+    FunctionProtoType::ExtProtoInfo EPI;
+    EPI.Variadic = true;
+    QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
     Sema::CapturedParamNameType Params[] = {
+        std::make_pair(".global_tid.", KmpInt32Ty),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+        std::make_pair(".privates.",
+                       Context.VoidPtrTy.withConst().withRestrict()),
+        std::make_pair(
+            ".copy_fn.",
+            Context.getPointerType(CopyFnType).withConst().withRestrict()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".lb.", KmpUInt64Ty),
+        std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty),
+        std::make_pair(".liter.", KmpInt32Ty),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
                              Params);
+    // Mark this captured region as inlined, because we don't use outlined
+    // function directly.
+    getCurCapturedRegion()->TheCapturedDecl->addAttr(
+        AlwaysInlineAttr::CreateImplicit(
+            Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
     break;
   }
   case OMPD_taskloop_simd: {
@@ -4614,6 +4640,15 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKin
            LastIteration32.get()->getType()->hasSignedIntegerRepresentation(),
            LastIteration64.get(), SemaRef)))
     LastIteration = LastIteration32;
+  QualType VType = LastIteration.get()->getType();
+  QualType RealVType = VType;
+  QualType StrideVType = VType;
+  if (isOpenMPTaskLoopDirective(DKind)) {
+    VType =
+        SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+    StrideVType =
+        SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+  }
 
   if (!LastIteration.isUsable())
     return 0;
@@ -4649,7 +4684,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKin
 
   SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin();
 
-  QualType VType = LastIteration.get()->getType();
   // Build variables passed into runtime, nesessary for worksharing directives.
   ExprResult LB, UB, IL, ST, EUB;
   if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
@@ -4678,8 +4712,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKin
         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
 
     // Stride variable returned by runtime (we initialize it to 1 by default).
-    VarDecl *STDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.stride");
-    ST = buildDeclRefExpr(SemaRef, STDecl, VType, InitLoc);
+    VarDecl *STDecl =
+        buildVarDecl(SemaRef, InitLoc, StrideVType, ".omp.stride");
+    ST = buildDeclRefExpr(SemaRef, STDecl, StrideVType, InitLoc);
     SemaRef.AddInitializerToDecl(
         STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(),
         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
@@ -4699,8 +4734,8 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKin
   ExprResult IV;
   ExprResult Init;
   {
-    VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
-    IV = buildDeclRefExpr(SemaRef, IVDecl, VType, InitLoc);
+    VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv");
+    IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc);
     Expr *RHS = (isOpenMPWorksharingDirective(DKind) ||
                  isOpenMPTaskLoopDirective(DKind) ||
                  isOpenMPDistributeDirective(DKind))

Added: cfe/trunk/test/OpenMP/taskloop_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_codegen.cpp?rev=267395&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/taskloop_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/taskloop_codegen.cpp Mon Apr 25 07:22:29 2016
@@ -0,0 +1,198 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
+// expected-no-diagnostics
+// REQUIRES: x86-registered-target
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @main
+int main(int argc, char **argv) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ;
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null)
+#pragma omp taskloop nogroup
+  for (int i = 0; i < 10; ++i)
+    ;
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
+// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+  int i;
+#pragma omp taskloop if(argc) shared(argc, argv) collapse(2)
+  for (i = 0; i < argc; ++i)
+  for (int j = argc; j < argv[argc][argc]; ++j)
+    ;
+}
+
+// CHECK: define internal i32 [[TASK1]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK: store i32 %
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal i32 [[TASK2]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK: store i32 %
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal i32 [[TASK3]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: ret i32 0
+
+// CHECK-LABEL: @_ZN1SC2Ei
+struct S {
+  int a;
+  S(int c) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+#pragma omp taskloop shared(c)
+    for (a = 0; a < c; ++a)
+      ;
+  }
+} s(1);
+
+// CHECK: define internal i32 [[TASK4]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK: store i32 %
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+#endif




More information about the cfe-commits mailing list