[Openmp-commits] [openmp] [Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR lowering utilising omp.bounds (PR #68689)

Thu Oct 26 04:12:28 PDT 2023

================
@@ -2001,61 +2100,204 @@ static bool targetOpSupported(Operation &opInst) {
 }
 
 static void
-handleDeclareTargetMapVar(llvm::ArrayRef<Value> mapOperands,
+handleDeclareTargetMapVar(MapInfoData &mapData,
                           LLVM::ModuleTranslation &moduleTranslation,
                           llvm::IRBuilderBase &builder) {
-  for (const mlir::Value &mapOp : mapOperands) {
-    auto mapInfoOp =
-        mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
-    llvm::Value *mapOpValue =
-        moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
-    if (auto *declareTarget = getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(),
-                                                       moduleTranslation)) {
-      // The user's iterator will get invalidated if we modify an element,
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+    // In the case of declare target mapped variables, the basePointer is
+    // the reference pointer generated by the convertDeclareTargetAttr
+    // method. Whereas the kernelValue is the original variable, so for
+    // the device we must replace all uses of this original global variable
+    // (stored in kernelValue) with the reference pointer (stored in
+    // basePointer for declare target mapped variables), as for device the
+    // data is mapped into this reference pointer and should be loaded
+    // from it, the original variable is discarded. On host both exist and
+    // metadata is generated (elsewhere in the convertDeclareTargetAttr)
+    // function to link the two variables in the runtime and then both the
+    // reference pointer and the pointer are assigned in the kernel argument
+    // structure for the host.
+    if (mapData.IsDeclareTarget[i]) {
+      // The users iterator will get invalidated if we modify an element,
       // so we populate this vector of uses to alter each user on an individual
       // basis to emit its own load (rather than one load for all).
       llvm::SmallVector<llvm::User *> userVec;
-      for (llvm::User *user : mapOpValue->users())
+      for (llvm::User *user : mapData.OriginalValue[i]->users())
         userVec.push_back(user);
 
       for (llvm::User *user : userVec) {
         if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
-          auto *load = builder.CreateLoad(
-              moduleTranslation.convertType(mapInfoOp.getVarPtr().getType()),
-              declareTarget);
+          auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
+                                          mapData.BasePointers[i]);
           load->moveBefore(insn);
-          user->replaceUsesOfWith(mapOpValue, load);
+          user->replaceUsesOfWith(mapData.OriginalValue[i], load);
         }
       }
     }
   }
 }
 
+// This currently implements a very light version of Clang's
+// EmitParmDecl's handling of direct argument handling as well
+// as a portion of the argument access generation based on
+// capture types found at the end of emitOutlinedFunctionPrologue
+// in Clang. The indirect path handling of EmitParmDecl's may be
+// required for future work, but a direct 1-to-1 copy doesn't seem
+// possible as the logic is rather scattered throughout Clang's
+// lowering and perhaps we wish to deviate slightly.
 static llvm::IRBuilderBase::InsertPoint
-createDeviceArgumentAccessor(llvm::Argument &arg, llvm::Value *input,
-                             llvm::Value *&retVal, llvm::IRBuilderBase &builder,
+createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
+                             llvm::Value *input, llvm::Value *&retVal,
+                             llvm::IRBuilderBase &builder,
                              llvm::OpenMPIRBuilder &ompBuilder,
                              LLVM::ModuleTranslation &moduleTranslation,
                              llvm::IRBuilderBase::InsertPoint allocaIP,
                              llvm::IRBuilderBase::InsertPoint codeGenIP) {
   builder.restoreIP(allocaIP);
 
-  llvm::Value *addr =
+  mlir::omp::VariableCaptureKind capture =
+      mlir::omp::VariableCaptureKind::ByRef;
+  llvm::Type *inputType = input->getType();
+
+  // Find the associated MapInfoData entry for the current input
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
+    if (mapData.OriginalValue[i] == input) {
+      if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+              mapData.MapClause[i])) {
+        capture = mapOp.getMapCaptureType().value_or(
+            mlir::omp::VariableCaptureKind::ByRef);
+      }
+
+      inputType = mapData.BaseType[i];
+      break;
+    }
+
+  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
+  unsigned int defaultAS =
+      ompBuilder.M.getDataLayout().getProgramAddressSpace();
+
+  // Create the alloca for the argument the current point.
+  llvm::Value *v =
       builder.CreateAlloca(arg.getType()->isPointerTy()
                                ? arg.getType()
                                : llvm::Type::getInt64Ty(builder.getContext()),
                            ompBuilder.M.getDataLayout().getAllocaAddrSpace());
-  llvm::Value *addrAscast =
-      arg.getType()->isPointerTy()
-          ? builder.CreatePointerBitCastOrAddrSpaceCast(addr, input->getType())
-          : addr;
 
-  builder.CreateStore(&arg, addrAscast);
+  if (allocaAS != defaultAS && arg.getType()->isPointerTy()) {
+    v = builder.CreatePointerBitCastOrAddrSpaceCast(
+        v, arg.getType()->getPointerTo(defaultAS));
+  }
+
+  builder.CreateStore(&arg, v);
+
   builder.restoreIP(codeGenIP);
-  retVal = builder.CreateLoad(arg.getType(), addrAscast);
+
+  switch (capture) {
+  case mlir::omp::VariableCaptureKind::ByCopy: {
+    if (inputType->isPointerTy()) {
+      retVal = v;
+      return builder.saveIP();
+    }
+
+    // Ignore conversions like int -> uint.
+    if (v->getType() == inputType->getPointerTo()) {
+      retVal = v;
+      return builder.saveIP();
+    }
+
+    assert(false && "Currently unsupported OMPTargetVarCaptureByCopy Type");
+    break;
+  }
+  case mlir::omp::VariableCaptureKind::ByRef: {
+    retVal = builder.CreateAlignedLoad(
+        v->getType(), v,
+        ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
+    break;
+  }
+  case mlir::omp::VariableCaptureKind::This:
+  case mlir::omp::VariableCaptureKind::VLAType:
+    assert(false && "Currently unsupported capture kind");
+    break;
+  }
+
   return builder.saveIP();
 }
 
+// This is a variation on Clang's GenerateOpenMPCapturedVars, which
+// generates different operation (e.g. load/store) combinations for
+// arguments to the kernel, based on map capture kinds which are then
+// utilised in the combinedInfo in place of the original Map value.
+static void
+createAlteredByCaptureMap(MapInfoData &mapData,
+                          LLVM::ModuleTranslation &moduleTranslation,
+                          llvm::IRBuilderBase &builder) {
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+    // if it's declare target, skip it, it's handled seperately.
+    if (!mapData.IsDeclareTarget[i]) {
+      mlir::omp::VariableCaptureKind captureKind =
+          mlir::omp::VariableCaptureKind::ByRef;
+
+      if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+              mapData.MapClause[i])) {
+        captureKind = mapOp.getMapCaptureType().value_or(
+            mlir::omp::VariableCaptureKind::ByRef);
+      }
+
+      switch (captureKind) {
+      case mlir::omp::VariableCaptureKind::ByRef: {
+        // Currently handles array sectioning lowerbound case, but more
+        // logic may be required in the future. Clang invokes EmitLValue,
+        // which has specialised logic for special Clang types such as user
+        // defines, so it is possible we will have to extend this for
+        // structures or other complex types. As the general idea is that this
+        // function mimics some of the logic from Clang that we require for
+        // kernel argument passing from host -> device.
+        if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+                mapData.MapClause[i])) {
+          if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) {
+
+            std::vector<llvm::Value *> idx =
+                std::vector<llvm::Value *>{builder.getInt64(0)};
+            for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) {
+              if (auto boundOp =
+                      mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
+                          mapOp.getBounds()[i].getDefiningOp())) {
+                idx.push_back(
+                    moduleTranslation.lookupValue(boundOp.getLowerBound()));
+              }
+            }
+
+            mapData.Pointers[i] = builder.CreateInBoundsGEP(
+                mapData.BaseType[i], mapData.Pointers[i], idx);
+          }
+        }
+      } break;
+      case mlir::omp::VariableCaptureKind::ByCopy: {
----------------
kiranchandramohan wrote:

In this location, we only have `MLIR -> LLVM-IR ` tests, so Fortran OpenMP -> executable test is not needed. You could alternatively move this code out of this patch into a separate patch.

https://github.com/llvm/llvm-project/pull/68689