[PATCH] D17019: [OpenMP] Code generation for teams - kernel launching

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 16 20:27:46 PST 2016


ABataev added inline comments.

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3811-3818
@@ -3789,1 +3810,10 @@
       DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams and thread limit to pass to the runtime function call
+  // later on. This is done through a function that returns the value. This is
+  // required because the expression is captured in the enclosing target
+  // environment when the teams directive is not combined with target. This only
+  // has to be done for the host.
+  //
+  // FIXME: Accommodate other combined directives with teams when they become
----------------
It is better to use OMPCapturedExprDecl for this, just like it is done for schedule clause

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3822-3857
@@ -3790,1 +3821,38 @@
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+      if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+        auto &&CodeGen = [NTE](CodeGenFunction &CGF) {
+          auto *V = CGF.EmitScalarExpr(NTE->getNumTeams());
+          CGF.Builder.CreateRet(
+              CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+          CGF.EmitBlock(CGF.createBasicBlock());
+        };
+
+        CodeGenFunction CGF(CGM, true);
+        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+                                        ".omp_offload.get_num_teams");
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+        NestedNumTeamsFn =
+            CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+        NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+      if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+        auto &&CodeGen = [TLE](CodeGenFunction &CGF) {
+          auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit());
+          CGF.Builder.CreateRet(
+              CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+          CGF.EmitBlock(CGF.createBasicBlock());
+        };
+
+        CodeGenFunction CGF(CGM, true);
+        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+                                        ".omp_offload.get_thread_limit");
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+        NestedThreadLimitFn =
+            CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+        NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+    }
   return;
----------------
Please, do it in separate functions

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3837
@@ +3836,3 @@
+        NestedNumTeamsFn =
+            CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+        NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline);
----------------
Return type must be Int32Ty, I think

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3854
@@ +3853,3 @@
+        NestedThreadLimitFn =
+            CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+        NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline);
----------------
Also Int32Ty

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:4148-4161
@@ +4147,16 @@
+      llvm::Value *ThreadLimit = nullptr;
+      if (TD->getSingleClause<OMPNumTeamsClause>()) {
+        assert(NestedNumTeamsFn && "Helper function is required to get the "
+                                   "number of teams of an enclosed teams "
+                                   "directive.");
+        NumTeams = CGF.Builder.CreateCall(NestedNumTeamsFn, BasePointers);
+      } else
+        NumTeams = CGF.Builder.getInt32(0);
+      if (TD->getSingleClause<OMPThreadLimitClause>()) {
+        assert(NestedThreadLimitFn && "Helper function is required to get the "
+                                      "thread limit of an enclosed teams "
+                                      "directive.");
+        ThreadLimit = CGF.Builder.CreateCall(NestedThreadLimitFn, BasePointers);
+      } else
+        ThreadLimit = CGF.Builder.getInt32(0);
+
----------------
Again, all this must be done in separate functions


http://reviews.llvm.org/D17019





More information about the cfe-commits mailing list