[Openmp-commits] [openmp] [Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR lowering utilising omp.bounds (PR #68689)
via Openmp-commits
openmp-commits at lists.llvm.org
Thu Oct 26 04:08:36 PDT 2023
================
@@ -2001,61 +2100,204 @@ static bool targetOpSupported(Operation &opInst) {
}
static void
-handleDeclareTargetMapVar(llvm::ArrayRef<Value> mapOperands,
+handleDeclareTargetMapVar(MapInfoData &mapData,
LLVM::ModuleTranslation &moduleTranslation,
llvm::IRBuilderBase &builder) {
- for (const mlir::Value &mapOp : mapOperands) {
- auto mapInfoOp =
- mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
- llvm::Value *mapOpValue =
- moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
- if (auto *declareTarget = getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(),
- moduleTranslation)) {
- // The user's iterator will get invalidated if we modify an element,
+ for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+ // In the case of declare target mapped variables, the basePointer is
+ // the reference pointer generated by the convertDeclareTargetAttr
+ // method. Whereas the kernelValue is the original variable, so for
+ // the device we must replace all uses of this original global variable
+ // (stored in kernelValue) with the reference pointer (stored in
+ // basePointer for declare target mapped variables), as for device the
+ // data is mapped into this reference pointer and should be loaded
+ // from it, the original variable is discarded. On host both exist and
+ // metadata is generated (elsewhere in the convertDeclareTargetAttr)
+ // function to link the two variables in the runtime and then both the
+ // reference pointer and the pointer are assigned in the kernel argument
+ // structure for the host.
+ if (mapData.IsDeclareTarget[i]) {
+ // The users iterator will get invalidated if we modify an element,
// so we populate this vector of uses to alter each user on an individual
// basis to emit its own load (rather than one load for all).
llvm::SmallVector<llvm::User *> userVec;
- for (llvm::User *user : mapOpValue->users())
+ for (llvm::User *user : mapData.OriginalValue[i]->users())
userVec.push_back(user);
for (llvm::User *user : userVec) {
if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
- auto *load = builder.CreateLoad(
- moduleTranslation.convertType(mapInfoOp.getVarPtr().getType()),
- declareTarget);
+ auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
+ mapData.BasePointers[i]);
load->moveBefore(insn);
- user->replaceUsesOfWith(mapOpValue, load);
+ user->replaceUsesOfWith(mapData.OriginalValue[i], load);
}
}
}
}
}
+// This currently implements a very light version of Clang's
+// EmitParmDecl's handling of direct argument handling as well
+// as a portion of the argument access generation based on
+// capture types found at the end of emitOutlinedFunctionPrologue
+// in Clang. The indirect path handling of EmitParmDecl's may be
+// required for future work, but a direct 1-to-1 copy doesn't seem
+// possible as the logic is rather scattered throughout Clang's
+// lowering and perhaps we wish to deviate slightly.
static llvm::IRBuilderBase::InsertPoint
-createDeviceArgumentAccessor(llvm::Argument &arg, llvm::Value *input,
- llvm::Value *&retVal, llvm::IRBuilderBase &builder,
+createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
+ llvm::Value *input, llvm::Value *&retVal,
+ llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder &ompBuilder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::IRBuilderBase::InsertPoint allocaIP,
llvm::IRBuilderBase::InsertPoint codeGenIP) {
builder.restoreIP(allocaIP);
- llvm::Value *addr =
+ mlir::omp::VariableCaptureKind capture =
+ mlir::omp::VariableCaptureKind::ByRef;
+ llvm::Type *inputType = input->getType();
+
+ // Find the associated MapInfoData entry for the current input
+ for (size_t i = 0; i < mapData.MapClause.size(); ++i)
+ if (mapData.OriginalValue[i] == input) {
+ if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+ mapData.MapClause[i])) {
+ capture = mapOp.getMapCaptureType().value_or(
+ mlir::omp::VariableCaptureKind::ByRef);
+ }
+
+ inputType = mapData.BaseType[i];
+ break;
+ }
+
+ unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
+ unsigned int defaultAS =
+ ompBuilder.M.getDataLayout().getProgramAddressSpace();
+
+ // Create the alloca for the argument the current point.
+ llvm::Value *v =
builder.CreateAlloca(arg.getType()->isPointerTy()
? arg.getType()
: llvm::Type::getInt64Ty(builder.getContext()),
ompBuilder.M.getDataLayout().getAllocaAddrSpace());
- llvm::Value *addrAscast =
- arg.getType()->isPointerTy()
- ? builder.CreatePointerBitCastOrAddrSpaceCast(addr, input->getType())
- : addr;
- builder.CreateStore(&arg, addrAscast);
+ if (allocaAS != defaultAS && arg.getType()->isPointerTy()) {
+ v = builder.CreatePointerBitCastOrAddrSpaceCast(
+ v, arg.getType()->getPointerTo(defaultAS));
+ }
+
+ builder.CreateStore(&arg, v);
+
builder.restoreIP(codeGenIP);
- retVal = builder.CreateLoad(arg.getType(), addrAscast);
+
+ switch (capture) {
+ case mlir::omp::VariableCaptureKind::ByCopy: {
+ if (inputType->isPointerTy()) {
+ retVal = v;
+ return builder.saveIP();
+ }
+
+ // Ignore conversions like int -> uint.
+ if (v->getType() == inputType->getPointerTo()) {
+ retVal = v;
+ return builder.saveIP();
+ }
+
+ assert(false && "Currently unsupported OMPTargetVarCaptureByCopy Type");
+ break;
+ }
+ case mlir::omp::VariableCaptureKind::ByRef: {
+ retVal = builder.CreateAlignedLoad(
+ v->getType(), v,
+ ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
+ break;
+ }
+ case mlir::omp::VariableCaptureKind::This:
+ case mlir::omp::VariableCaptureKind::VLAType:
+ assert(false && "Currently unsupported capture kind");
+ break;
+ }
+
return builder.saveIP();
}
+// This is a variation on Clang's GenerateOpenMPCapturedVars, which
+// generates different operation (e.g. load/store) combinations for
+// arguments to the kernel, based on map capture kinds which are then
+// utilised in the combinedInfo in place of the original Map value.
+static void
+createAlteredByCaptureMap(MapInfoData &mapData,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder) {
+ for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+ // if it's declare target, skip it, it's handled seperately.
+ if (!mapData.IsDeclareTarget[i]) {
+ mlir::omp::VariableCaptureKind captureKind =
+ mlir::omp::VariableCaptureKind::ByRef;
+
+ if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+ mapData.MapClause[i])) {
+ captureKind = mapOp.getMapCaptureType().value_or(
+ mlir::omp::VariableCaptureKind::ByRef);
+ }
+
+ switch (captureKind) {
+ case mlir::omp::VariableCaptureKind::ByRef: {
+ // Currently handles array sectioning lowerbound case, but more
+ // logic may be required in the future. Clang invokes EmitLValue,
+ // which has specialised logic for special Clang types such as user
+ // defines, so it is possible we will have to extend this for
+ // structures or other complex types. As the general idea is that this
+ // function mimics some of the logic from Clang that we require for
+ // kernel argument passing from host -> device.
+ if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+ mapData.MapClause[i])) {
+ if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) {
+
+ std::vector<llvm::Value *> idx =
+ std::vector<llvm::Value *>{builder.getInt64(0)};
+ for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) {
+ if (auto boundOp =
+ mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
+ mapOp.getBounds()[i].getDefiningOp())) {
+ idx.push_back(
+ moduleTranslation.lookupValue(boundOp.getLowerBound()));
+ }
+ }
+
+ mapData.Pointers[i] = builder.CreateInBoundsGEP(
+ mapData.BaseType[i], mapData.Pointers[i], idx);
+ }
+ }
+ } break;
+ case mlir::omp::VariableCaptureKind::ByCopy: {
----------------
agozillon wrote:
Not upstream currently, downstream yes. It unfortunately isn't testable until @TIFitis IsolatedFromAbove patch series lands, which also incorporates implicit argument capture which for scalars defaults to ByCopy. At the moment explicit's all default to capture ByRef.
I could perhaps write an MLIR -> LLVM-IR test showcasing the transformation though, as opposed to a Fortran OpenMP -> executable test, if that sounds reasonable in the interim?
https://github.com/llvm/llvm-project/pull/68689
More information about the Openmp-commits
mailing list