[flang-commits] [flang] eaa0d28 - [Flang][MLIR][OpenMP] Update OMPEarlyOutlining to support Bounds, MapEntry and declare target globals

Andrew Gozillon via flang-commits flang-commits at lists.llvm.org
Tue Sep 19 06:29:17 PDT 2023


Author: Andrew Gozillon
Date: 2023-09-19T08:26:46-05:00
New Revision: eaa0d281b62cf0e1e538c24b69b2acde7b0fa665

URL: https://github.com/llvm/llvm-project/commit/eaa0d281b62cf0e1e538c24b69b2acde7b0fa665
DIFF: https://github.com/llvm/llvm-project/commit/eaa0d281b62cf0e1e538c24b69b2acde7b0fa665.diff

LOG: [Flang][MLIR][OpenMP] Update OMPEarlyOutlining to support Bounds, MapEntry and declare target globals

This patch is a required change for the device side IR to
maintain apporpiate links for declare target variables to
their global variables for later lowering.

It is also a requirement to clone over map bounds and
entry operations to maintain the correct information for
later lowering of the IR.

It simply tries to clone over the relevant information
maintaining the appropriate links they would have
maintained prior to the pass, rather than redirecting
them to new function arguments which causes a
loss of information in the case of Declare Target
and map information.

Depends on D158734

reviewers: TIFitis, razvanlupusoru

Differential Revision: https://reviews.llvm.org/D158735

Added: 
    

Modified: 
    flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp
index 6bbcbf84ac0b3fc..20ef66e0ad48cec 100644
--- a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp
+++ b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp
@@ -29,18 +29,112 @@ class OMPEarlyOutliningPass
     return std::string(parentName) + "_omp_outline_" + std::to_string(count);
   }
 
+  // Given a value this function will iterate over an operators results
+  // and return the relevant index for the result the value corresponds to.
+  // There may be a simpler way to do this however.
+  unsigned getResultIndex(mlir::Value value, mlir::Operation *op) {
+    for (unsigned i = 0; i < op->getNumResults(); ++i) {
+      if (op->getResult(i) == value)
+        return i;
+    }
+    return 0;
+  }
+
+  bool isDeclareTargetOp(mlir::Operation *op) {
+    if (fir::AddrOfOp addressOfOp = mlir::dyn_cast<fir::AddrOfOp>(op))
+      if (fir::GlobalOp gOp = mlir::dyn_cast<fir::GlobalOp>(
+              addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
+                  addressOfOp.getSymbol())))
+        if (auto declareTargetGlobal =
+                llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
+                    gOp.getOperation()))
+          if (declareTargetGlobal.isDeclareTarget())
+            return true;
+    return false;
+  }
+
+  // Currently used for cloning arguments that are nested. Should be
+  // extendable where required, perhaps via operation
+  // specialisation/overloading, if something needs specialised handling.
+  // NOTE: Results in duplication of some values that would otherwise be
+  // a single SSA value shared between operations, this is tidied up on
+  // lowering to some extent.
+  mlir::Operation *
+  cloneArgAndChildren(mlir::OpBuilder &builder, mlir::Operation *op,
+                      llvm::SetVector<mlir::Value> &inputs,
+                      mlir::Block::BlockArgListType &newInputs) {
+    mlir::IRMapping valueMap;
+    for (auto opValue : op->getOperands()) {
+      if (opValue.getDefiningOp()) {
+        auto resIdx = getResultIndex(opValue, opValue.getDefiningOp());
+        valueMap.map(opValue,
+                     cloneArgAndChildren(builder, opValue.getDefiningOp(),
+                                         inputs, newInputs)
+                         ->getResult(resIdx));
+      } else {
+        for (auto inArg : llvm::zip(inputs, newInputs)) {
+          if (opValue == std::get<0>(inArg))
+            valueMap.map(opValue, std::get<1>(inArg));
+        }
+      }
+    }
+
+    return builder.clone(*op, valueMap);
+  }
+
+  void cloneMapOpVariables(mlir::OpBuilder &builder, mlir::IRMapping &valueMap,
+                           mlir::IRMapping &mapInfoMap,
+                           llvm::SetVector<mlir::Value> &inputs,
+                           mlir::Block::BlockArgListType &newInputs,
+                           mlir::Value varPtr) {
+    if (fir::BoxAddrOp boxAddrOp =
+            mlir::dyn_cast_if_present<fir::BoxAddrOp>(varPtr.getDefiningOp())) {
+      mlir::Value newV =
+          cloneArgAndChildren(builder, boxAddrOp, inputs, newInputs)
+              ->getResult(0);
+      mapInfoMap.map(varPtr, newV);
+      valueMap.map(boxAddrOp, newV);
+      return;
+    }
+
+    if (varPtr.getDefiningOp() && isDeclareTargetOp(varPtr.getDefiningOp())) {
+      fir::AddrOfOp addrOp =
+          mlir::dyn_cast<fir::AddrOfOp>(varPtr.getDefiningOp());
+      mlir::Value newV = builder.clone(*addrOp)->getResult(0);
+      mapInfoMap.map(varPtr, newV);
+      valueMap.map(addrOp, newV);
+      return;
+    }
+
+    for (auto inArg : llvm::zip(inputs, newInputs)) {
+      if (varPtr == std::get<0>(inArg))
+        mapInfoMap.map(varPtr, std::get<1>(inArg));
+    }
+  }
+
   mlir::func::FuncOp outlineTargetOp(mlir::OpBuilder &builder,
                                      mlir::omp::TargetOp &targetOp,
                                      mlir::func::FuncOp &parentFunc,
                                      unsigned count) {
+    // NOTE: once implicit captures are handled appropriately in the initial
+    // PFT lowering if it is possible, we can remove the usage of
+    // getUsedValuesDefinedAbove and instead just iterate over the target op's
+    // operands (or just the map arguments) and perhaps refactor this function
+    // a little.
     // Collect inputs
     llvm::SetVector<mlir::Value> inputs;
-    for (auto operand : targetOp.getOperation()->getOperands())
-      inputs.insert(operand);
-
     mlir::Region &targetRegion = targetOp.getRegion();
     mlir::getUsedValuesDefinedAbove(targetRegion, inputs);
 
+    // filter out declareTarget and map entries which are specially handled
+    // at the moment, so we do not wish these to end up as function arguments
+    // which would just be more noise in the IR.
+    for (auto value : inputs)
+      if (value.getDefiningOp())
+        if (mlir::isa<mlir::omp::MapInfoOp>(value.getDefiningOp()) ||
+            isDeclareTargetOp(value.getDefiningOp()))
+          inputs.remove(value);
+
     // Create new function and initialize
     mlir::FunctionType funcType = builder.getFunctionType(
         mlir::TypeRange(inputs.getArrayRef()), mlir::TypeRange());
@@ -51,7 +145,7 @@ class OMPEarlyOutliningPass
         mlir::func::FuncOp::create(loc, funcName, funcType);
     mlir::Block *entryBlock = newFunc.addEntryBlock();
     builder.setInsertionPointToStart(entryBlock);
-    mlir::ValueRange newInputs = entryBlock->getArguments();
+    mlir::Block::BlockArgListType newInputs = entryBlock->getArguments();
 
     // Set the declare target information, the outlined function
     // is always a host function.
@@ -68,10 +162,47 @@ class OMPEarlyOutliningPass
                 newFunc.getOperation()))
       earlyOutlineOp.setParentName(parentName);
 
-    // Create input map from inputs to function parameters.
+    // The value map for the newly generated Target Operation, we must
+    // remap most of the input.
     mlir::IRMapping valueMap;
-    for (auto InArg : llvm::zip(inputs, newInputs))
-      valueMap.map(std::get<0>(InArg), std::get<1>(InArg));
+
+    // Special handling for map, declare target and regular map variables
+    // are handled slightly 
diff erently for the moment, declare target has
+    // its addressOfOp cloned over, whereas we skip it for the regular map
+    // variables. We need knowledge of which global is linked to the map
+    // operation for declare target, whereas we aren't bothered for the
+    // regular map variables for the moment. We could treat both the same,
+    // however, cloning across the minimum for the moment to avoid
+    // optimisations breaking segments of the lowering seems prudent as this
+    // was the original intent of the pass.
+    for (auto oper : targetOp.getOperation()->getOperands()) {
+      if (auto mapEntry =
+              mlir::dyn_cast<mlir::omp::MapInfoOp>(oper.getDefiningOp())) {
+        mlir::IRMapping mapInfoMap;
+        for (auto bound : mapEntry.getBounds()) {
+          if (auto mapEntryBound = mlir::dyn_cast<mlir::omp::DataBoundsOp>(
+                  bound.getDefiningOp())) {
+            mapInfoMap.map(bound, cloneArgAndChildren(builder, mapEntryBound,
+                                                      inputs, newInputs)
+                                      ->getResult(0));
+          }
+        }
+
+        cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs,
+                            mapEntry.getVarPtr());
+
+        if (mapEntry.getVarPtrPtr())
+          cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs,
+                              mapEntry.getVarPtrPtr());
+
+        valueMap.map(
+            mapEntry,
+            builder.clone(*mapEntry.getOperation(), mapInfoMap)->getResult(0));
+      }
+    }
+
+    for (auto inArg : llvm::zip(inputs, newInputs))
+      valueMap.map(std::get<0>(inArg), std::get<1>(inArg));
 
     // Clone the target op into the new function
     builder.clone(*(targetOp.getOperation()), valueMap);


        


More information about the flang-commits mailing list