[Mlir-commits] [mlir] [mlir][gpu] Separate the barrier elimination code from transform ops (PR #71762)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Wed Nov 8 19:13:54 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mlir

Author: None (spaceotter)

<details>
<summary>Changes</summary>

Allows the barrier elimination code to be run from C++ as well.

---

Patch is 50.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71762.diff


7 Files Affected:

- (modified) mlir/include/mlir/Dialect/GPU/Transforms/Passes.h (+4) 
- (modified) mlir/include/mlir/Dialect/GPU/Transforms/Passes.td (+14) 
- (modified) mlir/include/mlir/Dialect/GPU/Transforms/Utils.h (+1) 
- (modified) mlir/lib/Dialect/GPU/CMakeLists.txt (+1) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+2-569) 
- (added) mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp (+614) 
- (modified) mlir/test/Dialect/GPU/barrier-elimination.mlir (+1) 


``````````diff
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 2a891a7d24f809a..992204673395cb4 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -165,6 +165,10 @@ void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns);
 /// Pass decomposes memref ops inside `gpu.launch` body.
 std::unique_ptr<Pass> createGpuDecomposeMemrefsPass();
 
+void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns);
+
+std::unique_ptr<OperationPass<func::FuncOp>> createGpuEliminateBarriersPass();
+
 /// Generate the code for registering passes.
 #define GEN_PASS_REGISTRATION
 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
index 059893127295bf5..7326a9c2a0bf68f 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -37,6 +37,20 @@ def GpuMapParallelLoopsPass
   let dependentDialects = ["mlir::gpu::GPUDialect"];
 }
 
+def GpuEliminateBarriers
+    : Pass<"gpu-eliminate-barriers", "mlir::func::FuncOp"> {
+  let summary = "Erase unecessary barriers";
+  let constructor = "mlir::createGpuEliminateBarriersPass()";
+  let description = [{
+    Erase barriers that are unneeded because there are no dependent side effects
+    across the barrier.
+  }];
+  let dependentDialects = [
+    "mlir::gpu::GPUDialect", "mlir::scf::SCFDialect",
+    "mlir::memref::MemRefDialect"
+  ];
+}
+
 def GpuDecomposeMemrefsPass : Pass<"gpu-decompose-memrefs"> {
   let summary = "Decomposes memref index computation into explicit ops.";
   let description = [{
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h b/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h
index a426bee7686dbc0..8ddff0817667d65 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h
@@ -14,6 +14,7 @@
 #define MLIR_DIALECT_GPU_TRANSFORMS_UTILS_H_
 
 #include "mlir/Support/LLVM.h"
+#include "mlir/IR/PatternMatch.h"
 
 #include <string>
 
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 09a3cd06788bc5e..e26b5dacc98c365 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -62,6 +62,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
   Transforms/ShuffleRewriter.cpp
   Transforms/SPIRVAttachTarget.cpp
   Transforms/ROCDLAttachTarget.cpp
+  Transforms/EliminateBarriers.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index d4908fa7e89e736..0d5e9ae9bbd3792 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -263,575 +263,8 @@ void transform::ApplyUnrollVectorsSubgroupMmaOp::populatePatterns(
 // EliminateBarriersOp
 //===----------------------------------------------------------------------===//
 
-// The functions below provide interface-like verification, but are too specific
-// to barrier elimination to become interfaces.
-
-/// Implement the MemoryEffectsOpInterface in the suitable way.
-static bool isKnownNoEffectsOpWithoutInterface(Operation *op) {
-  // memref::AssumeAlignment is conceptually pure, but marking it as such would
-  // make DCE immediately remove it.
-  return isa<memref::AssumeAlignmentOp>(op);
-}
-
-/// Returns `true` if the op is defines the parallel region that is subject to
-/// barrier synchronization.
-static bool isParallelRegionBoundary(Operation *op) {
-  if (op->hasAttr("__parallel_region_boundary_for_test"))
-    return true;
-
-  return isa<GPUFuncOp, LaunchOp>(op);
-}
-
-/// Returns `true` if the op behaves like a sequential loop, e.g., the control
-/// flow "wraps around" from the end of the body region back to its start.
-static bool isSequentialLoopLike(Operation *op) { return isa<scf::ForOp>(op); }
-
-/// Returns `true` if the regions of the op are guaranteed to be executed at
-/// most once. Thus, if an operation in one of the nested regions of `op` is
-/// executed than so are all the other operations in this region.
-static bool hasSingleExecutionBody(Operation *op) {
-  return isa<scf::IfOp, memref::AllocaScopeOp>(op);
-}
-
-/// Returns `true` if the operation is known to produce a pointer-like object
-/// distinct from any other object produced by a similar operation. For example,
-/// an allocation produces such an object.
-static bool producesDistinctBase(Operation *op) {
-  return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op);
-}
-
-/// Populates `effects` with all memory effects without associating them to a
-/// specific value.
-static void addAllValuelessEffects(
-    SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
-  effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Read>());
-  effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Write>());
-  effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Allocate>());
-  effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Free>());
-}
-
-/// Collect the memory effects of the given op in 'effects'. Returns 'true' if
-/// it could extract the effect information from the op, otherwise returns
-/// 'false' and conservatively populates the list with all possible effects
-/// associated with no particular value or symbol.
-static bool
-collectEffects(Operation *op,
-               SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
-               bool ignoreBarriers = true) {
-  // Skip over barriers to avoid infinite recursion (those barriers would ask
-  // this barrier again).
-  if (ignoreBarriers && isa<BarrierOp>(op))
-    return true;
-
-  // Skip over ops that we know have no effects.
-  if (isKnownNoEffectsOpWithoutInterface(op))
-    return true;
-
-  // Collect effect instances the operation. Note that the implementation of
-  // getEffects erases all effect instances that have the type other than the
-  // template parameter so we collect them first in a local buffer and then
-  // copy.
-  if (auto iface = dyn_cast<MemoryEffectOpInterface>(op)) {
-    SmallVector<MemoryEffects::EffectInstance> localEffects;
-    iface.getEffects(localEffects);
-    llvm::append_range(effects, localEffects);
-    return true;
-  }
-  if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
-    for (auto &region : op->getRegions()) {
-      for (auto &block : region) {
-        for (auto &innerOp : block)
-          if (!collectEffects(&innerOp, effects, ignoreBarriers))
-            return false;
-      }
-    }
-    return true;
-  }
-
-  // We need to be conservative here in case the op doesn't have the interface
-  // and assume it can have any possible effect.
-  addAllValuelessEffects(effects);
-  return false;
-}
-
-/// Collects memory effects from operations that may be executed before `op` in
-/// a trivial structured control flow, e.g., without branches. Stops at the
-/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
-/// set. Returns `true` if the memory effects added to `effects` are exact,
-/// `false` if they are a conservative over-approximation. The latter means that
-/// `effects` contain instances not associated with a specific value.
-static bool
-getEffectsBefore(Operation *op,
-                 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
-                 bool stopAtBarrier) {
-  if (!op->getBlock())
-    return true;
-
-  // If there is a non-structured control flow, bail.
-  Region *region = op->getBlock()->getParent();
-  if (region && !llvm::hasSingleElement(region->getBlocks())) {
-    addAllValuelessEffects(effects);
-    return false;
-  }
-
-  // Collect all effects before the op.
-  if (op != &op->getBlock()->front()) {
-    for (Operation *it = op->getPrevNode(); it != nullptr;
-         it = it->getPrevNode()) {
-      if (isa<BarrierOp>(it)) {
-        if (stopAtBarrier)
-          return true;
-        else
-          continue;
-      }
-      if (!collectEffects(it, effects))
-        return false;
-    }
-  }
-
-  // Stop if reached the parallel region boundary.
-  if (isParallelRegionBoundary(op->getParentOp()))
-    return true;
-
-  // Otherwise, keep collecting above the parent operation.
-  if (!getEffectsBefore(op->getParentOp(), effects, stopAtBarrier))
-    return false;
-
-  // If the op is loop-like, collect effects from the trailing operations until
-  // we hit a barrier because they can executed before the current operation by
-  // the previous iteration of this loop. For example, in the following loop
-  //
-  //   for i = ... {
-  //     op1
-  //     ...
-  //     barrier
-  //     op2
-  //   }
-  //
-  // the operation `op2` at iteration `i` is known to be executed before the
-  // operation `op1` at iteration `i+1` and the side effects must be ordered
-  // appropriately.
-  if (isSequentialLoopLike(op->getParentOp())) {
-    // Assuming loop terminators have no side effects.
-    return getEffectsBefore(op->getBlock()->getTerminator(), effects,
-                            /*stopAtBarrier=*/true);
-  }
-
-  // If the parent operation is not guaranteed to execute its (single-block)
-  // region once, walk the block.
-  bool conservative = false;
-  if (!hasSingleExecutionBody(op->getParentOp()))
-    op->getParentOp()->walk([&](Operation *in) {
-      if (conservative)
-        return WalkResult::interrupt();
-      if (!collectEffects(in, effects)) {
-        conservative = true;
-        return WalkResult::interrupt();
-      }
-      return WalkResult::advance();
-    });
-
-  return !conservative;
-}
-
-/// Collects memory effects from operations that may be executed after `op` in
-/// a trivial structured control flow, e.g., without branches. Stops at the
-/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
-/// set. Returns `true` if the memory effects added to `effects` are exact,
-/// `false` if they are a conservative over-approximation. The latter means that
-/// `effects` contain instances not associated with a specific value.
-static bool
-getEffectsAfter(Operation *op,
-                SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
-                bool stopAtBarrier) {
-  if (!op->getBlock())
-    return true;
-
-  // If there is a non-structured control flow, bail.
-  Region *region = op->getBlock()->getParent();
-  if (region && !llvm::hasSingleElement(region->getBlocks())) {
-    addAllValuelessEffects(effects);
-    return false;
-  }
-
-  // Collect all effects after the op.
-  if (op != &op->getBlock()->back())
-    for (Operation *it = op->getNextNode(); it != nullptr;
-         it = it->getNextNode()) {
-      if (isa<BarrierOp>(it)) {
-        if (stopAtBarrier)
-          return true;
-        continue;
-      }
-      if (!collectEffects(it, effects))
-        return false;
-    }
-
-  // Stop if reached the parallel region boundary.
-  if (isParallelRegionBoundary(op->getParentOp()))
-    return true;
-
-  // Otherwise, keep collecting below the parent operation.
-  if (!getEffectsAfter(op->getParentOp(), effects, stopAtBarrier))
-    return false;
-
-  // If the op is loop-like, collect effects from the leading operations until
-  // we hit a barrier because they can executed after the current operation by
-  // the next iteration of this loop. For example, in the following loop
-  //
-  //   for i = ... {
-  //     op1
-  //     ...
-  //     barrier
-  //     op2
-  //   }
-  //
-  // the operation `op1` at iteration `i` is known to be executed after the
-  // operation `op2` at iteration `i-1` and the side effects must be ordered
-  // appropriately.
-  if (isSequentialLoopLike(op->getParentOp())) {
-    if (isa<BarrierOp>(op->getBlock()->front()))
-      return true;
-
-    bool exact = collectEffects(&op->getBlock()->front(), effects);
-    return getEffectsAfter(&op->getBlock()->front(), effects,
-                           /*stopAtBarrier=*/true) &&
-           exact;
-  }
-
-  // If the parent operation is not guaranteed to execute its (single-block)
-  // region once, walk the block.
-  bool conservative = false;
-  if (!hasSingleExecutionBody(op->getParentOp()))
-    op->getParentOp()->walk([&](Operation *in) {
-      if (conservative)
-        return WalkResult::interrupt();
-      if (!collectEffects(in, effects)) {
-        conservative = true;
-        return WalkResult::interrupt();
-      }
-      return WalkResult::advance();
-    });
-
-  return !conservative;
-}
-
-/// Looks through known "view-like" ops to find the base memref.
-static Value getBase(Value v) {
-  while (true) {
-    Operation *definingOp = v.getDefiningOp();
-    if (!definingOp)
-      break;
-
-    bool shouldContinue =
-        TypeSwitch<Operation *, bool>(v.getDefiningOp())
-            .Case<memref::CastOp, memref::SubViewOp, memref::ViewOp>(
-                [&](auto op) {
-                  v = op.getSource();
-                  return true;
-                })
-            .Case<memref::TransposeOp>([&](auto op) {
-              v = op.getIn();
-              return true;
-            })
-            .Case<memref::CollapseShapeOp, memref::ExpandShapeOp>([&](auto op) {
-              v = op.getSrc();
-              return true;
-            })
-            .Default([](Operation *) { return false; });
-    if (!shouldContinue)
-      break;
-  }
-  return v;
-}
-
-/// Returns `true` if the value is defined as a function argument.
-static bool isFunctionArgument(Value v) {
-  auto arg = dyn_cast<BlockArgument>(v);
-  return arg && isa<FunctionOpInterface>(arg.getOwner()->getParentOp());
-}
-
-/// Returns the operand that the operation "propagates" through it for capture
-/// purposes. That is, if the value produced by this operation is captured, then
-/// so is the returned value.
-static Value propagatesCapture(Operation *op) {
-  return llvm::TypeSwitch<Operation *, Value>(op)
-      .Case(
-          [](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); })
-      .Case([](CastOpInterface castLike) { return castLike->getOperand(0); })
-      .Case([](memref::TransposeOp transpose) { return transpose.getIn(); })
-      .Case<memref::ExpandShapeOp, memref::CollapseShapeOp>(
-          [](auto op) { return op.getSrc(); })
-      .Default([](Operation *) { return Value(); });
-}
-
-/// Returns `true` if the given operation is known to capture the given value,
-/// `false` if it is known not to capture the given value, `nullopt` if neither
-/// is known.
-static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) {
-  return llvm::TypeSwitch<Operation *, std::optional<bool>>(op)
-      // Store-like operations don't capture the destination, but do capture
-      // the value.
-      .Case<memref::StoreOp, vector::TransferWriteOp>(
-          [&](auto op) { return op.getValue() == v; })
-      .Case<vector::StoreOp, vector::MaskedStoreOp>(
-          [&](auto op) { return op.getValueToStore() == v; })
-      // These operations are known not to capture.
-      .Case([](memref::DeallocOp) { return false; })
-      // By default, we don't know anything.
-      .Default([](Operation *) { return std::nullopt; });
-}
-
-/// Returns `true` if the value may be captured by any of its users, i.e., if
-/// the user may be storing this value into memory. This makes aliasing analysis
-/// more conservative as it cannot assume the pointer-like value is only passed
-/// around through SSA use-def.
-static bool maybeCaptured(Value v) {
-  SmallVector<Value> todo = {v};
-  while (!todo.empty()) {
-    Value v = todo.pop_back_val();
-    for (Operation *user : v.getUsers()) {
-      // A user that is known to only read cannot capture.
-      auto iface = dyn_cast<MemoryEffectOpInterface>(user);
-      if (iface) {
-        SmallVector<MemoryEffects::EffectInstance> effects;
-        iface.getEffects(effects);
-        if (llvm::all_of(effects,
-                         [](const MemoryEffects::EffectInstance &effect) {
-                           return isa<MemoryEffects::Read>(effect.getEffect());
-                         })) {
-          continue;
-        }
-      }
-
-      // When an operation is known to create an alias, consider if the
-      // source is captured as well.
-      if (Value v = propagatesCapture(user)) {
-        todo.push_back(v);
-        continue;
-      }
-
-      std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(user, v);
-      if (!knownCaptureStatus || *knownCaptureStatus)
-        return true;
-    }
-  }
-
-  return false;
-}
-
-/// Returns true if two values may be referencing aliasing memory. This is a
-/// rather naive and conservative analysis. Values defined by different
-/// allocation-like operations as well as values derived from those by casts and
-/// views cannot alias each other. Similarly, values defined by allocations
-/// inside a function cannot alias function arguments. Global values cannot
-/// alias each other or local allocations. Values that are captured, i.e.
-/// themselves potentially stored in memory, are considered as aliasing with
-/// everything. This seems sufficient to achieve barrier removal in structured
-/// control flow, more complex cases would require a proper dataflow analysis.
-static bool mayAlias(Value first, Value second) {
-  DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
-    DBGS_ALIAS() << "checking aliasing between ";
-    DBGS_ALIAS() << first << "\n";
-    DBGS_ALIAS() << "                      and ";
-    DBGS_ALIAS() << second << "\n";
-  });
-
-  first = getBase(first);
-  second = getBase(second);
-
-  DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
-    DBGS_ALIAS() << "base ";
-    DBGS_ALIAS() << first << "\n";
-    DBGS_ALIAS() << " and ";
-    DBGS_ALIAS() << second << "\n";
-  });
-
-  // Values derived from the same base memref do alias (unless we do a more
-  // advanced analysis to prove non-overlapping accesses).
-  if (first == second) {
-    DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> do alias!\n");
-    return true;
-  }
-
-  // Different globals cannot alias.
-  if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) {
-    if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) {
-      return globFirst.getNameAttr() == globSecond.getNameAttr();
-    }
-  }
-
-  // Two function arguments marked as noalias do not alias.
-  auto isNoaliasFuncArgument = [](Value value) {
-    auto bbArg = dyn_cast<BlockArgument>(value);
-    if (!bbArg)
-      return false;
-    auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp());
-    if (!iface)
-      return false;
-    // TODO: we need a way to not depend on the LLVM dialect here.
-    return iface.getArgAttr(bbArg.getArgNumber(), "llvm.noalias") != nullptr;
-  };
-  if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
-    return false;
-
-  bool isDistinct[] = {producesDistinctBase(first.getDefiningOp()),
-                       producesDistinctBase(second.getDefiningOp())};
-  bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr,
-                     second.getDefiningOp<memref::GetGlobalOp>() != nullptr};
-
-  // Non-equivalent distinct bases and globals cannot alias. At this point, we
-  // have already filtered out based on values being equal and global name being
-  // equal.
-  if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1]))
-    return false;
-
-  bool isArg[] = {isFunctionArgument(first), isFunctionArgument(second)};
-
-  // Distinct bases (allocations) cannot have been passed as an argument.
-  if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
-    return false;
-
-  // Non-captured base distinct values cannot conflict with another base value.
-  if (isDistinct[0] && !maybeCaptured(first))
-    return false;
-  if (isDistinct[1] && !maybeCaptured(second))
-    return false;
-
-  // Otherwise, conservatively assume aliasing.
-  DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n");
-  return true;
-}
-
-/// Returns `true` if the effect may be affecting memory aliasing the value. If
-/// the effect is not associated with any value, it is assumed to affect all
-/// memory an...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/71762


More information about the Mlir-commits mailing list