[Mlir-commits] [mlir] 981932b - [MLIR] Clarify (test-scf-)parallel-loop-collapsing

Wed Apr 5 04:41:27 PDT 2023

Author: Tres Popp
Date: 2023-04-05T13:41:15+02:00
New Revision: 981932bc57aeaa6d4dd02b011d848ed9fa4dd615

URL: https://github.com/llvm/llvm-project/commit/981932bc57aeaa6d4dd02b011d848ed9fa4dd615
DIFF: https://github.com/llvm/llvm-project/commit/981932bc57aeaa6d4dd02b011d848ed9fa4dd615.diff

LOG: [MLIR] Clarify (test-scf-)parallel-loop-collapsing

1. parallel-loop-collapsing is renamed to test-scf-parallel-loop-collapsing.
2. The pass adds various checks to provide error messages instead of
   hitting assert failures.
3. Testing is added to verify these error messages

This is roughly an NFC. The name changes, but all checked behavior
previously would have resulted in an assertion failure. Almost no new
support is added, so this pass is still limited in scope to testing the
transform behaves correctly with input arguments that perfectly match
the ParallelLoop's iterator arg set. The one new piece of functionality
is that invalid operations will now be skipped with an error messages
instead of producing an assertion failure, so the pass can be used with
expected failures for pieces of the IR not cared about with a specific
RUN command.

Differential Revision: https://reviews.llvm.org/D147514

Added: 
    mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir

Modified: 
    mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
    mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
    mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
    mlir/test/Transforms/parallel-loop-collapsing.mlir
    mlir/test/Transforms/single-parallel-loop-collapsing.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
index ce76572a2822f..90b315e83a8cf 100644

--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
@@ -37,7 +37,7 @@ std::unique_ptr<Pass> createSCFForLoopCanonicalizationPass();
 
 /// Creates a pass that transforms a single ParallelLoop over N induction
 /// variables into another ParallelLoop over less than N induction variables.
-std::unique_ptr<Pass> createParallelLoopCollapsingPass();
+std::unique_ptr<Pass> createTestSCFParallelLoopCollapsingPass();
 
 /// Creates a loop fusion pass which fuses parallel loops.
 std::unique_ptr<Pass> createParallelLoopFusionPass();

diff  --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
index 9abffe58a9240..16eb415c8cb61 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -50,9 +50,38 @@ def SCFParallelLoopFusion : Pass<"scf-parallel-loop-fusion"> {
   let constructor = "mlir::createParallelLoopFusionPass()";
 }
 
-def SCFParallelLoopCollapsing : Pass<"scf-parallel-loop-collapsing"> {
-  let summary = "Collapse parallel loops to use less induction variables";
-  let constructor = "mlir::createParallelLoopCollapsingPass()";
+def TestSCFParallelLoopCollapsing : Pass<"test-scf-parallel-loop-collapsing"> {
+  let summary = "Test parallel loops collapsing transformation";
+  let constructor = "mlir::createTestSCFParallelLoopCollapsingPass()";
+  let description = [{
+      This pass is purely for testing the scf::collapseParallelLoops
+      transformation. The transformation does not have opinions on how a
+      parallel loop should be collapsed, so this pass is structured for the
+      common case on GPUs of collapsing to a 3d parallel loop. 3 lists can be
+      provided to collapsed-indices-{0,1,2} to represent how the loop should be
+      collapsed and must reference evrey iterator in the original parallel loop.
+
+    ```mlir
+    # Before:
+    scf.parallel (%arg0, %arg1)
+                 = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+      "test.sink"(%5, %3) : (index, index) -> ()
+      scf.yield
+    }
+
+    # After:
+    scf.parallel (%arg0) = (%c0) to (%c4) step (%c1) {
+      %0 = arith.remsi %arg0, %c2 : index
+      %1 = arith.divsi %arg0, %c2 : index
+      %2 = arith.muli %0, %c7 : index
+      %3 = arith.addi %2, %c3 : index
+      %4 = arith.muli %1, %c7 : index
+      %5 = arith.addi %4, %c3 : index
+      "test.sink"(%5, %3) : (index, index) -> ()
+    }
+    ```
+  }];
+
   let options = [
     ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
                "Which loop indices to combine 0th loop index">,

diff  --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
index 34c5e777f9484..a69df025bcba8 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
@@ -11,11 +11,12 @@
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SCF/Utils/Utils.h"
 #include "mlir/Transforms/RegionUtils.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 
 namespace mlir {
-#define GEN_PASS_DEF_SCFPARALLELLOOPCOLLAPSING
+#define GEN_PASS_DEF_TESTSCFPARALLELLOOPCOLLAPSING
 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
 } // namespace mlir
 
@@ -24,27 +25,84 @@ namespace mlir {
 using namespace mlir;
 
 namespace {
-struct ParallelLoopCollapsing
-    : public impl::SCFParallelLoopCollapsingBase<ParallelLoopCollapsing> {
+struct TestSCFParallelLoopCollapsing
+    : public impl::TestSCFParallelLoopCollapsingBase<
+          TestSCFParallelLoopCollapsing> {
   void runOnOperation() override {
     Operation *module = getOperation();
 
+    // The common case for GPU dialect will be simplifying the ParallelOp to 3
+    // arguments, so we do that here to simplify things.
+    llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops;
+
+    // Gather the input args into the format required by
+    // `collapseParallelLoops`.
+    if (!clCollapsedIndices0.empty())
+      combinedLoops.push_back(clCollapsedIndices0);
+    if (!clCollapsedIndices1.empty()) {
+      if (clCollapsedIndices0.empty()) {
+        llvm::errs()
+            << "collapsed-indices-1 specified but not collapsed-indices-0";
+        signalPassFailure();
+        return;
+      }
+      combinedLoops.push_back(clCollapsedIndices1);
+    }
+    if (!clCollapsedIndices2.empty()) {
+      if (clCollapsedIndices1.empty()) {
+        llvm::errs()
+            << "collapsed-indices-2 specified but not collapsed-indices-1";
+        signalPassFailure();
+        return;
+      }
+      combinedLoops.push_back(clCollapsedIndices2);
+    }
+
+    if (combinedLoops.empty()) {
+      llvm::errs() << "No collapsed-indices were specified. This pass is only "
+                      "for testing and does not automatically collapse all "
+                      "parallel loops or similar.";
+      signalPassFailure();
+      return;
+    }
+
+    // Confirm that the specified loops are [0,N) by testing that N values exist
+    // with the maximum value being N-1.
+    llvm::SmallSet<unsigned, 8> flattenedCombinedLoops;
+    unsigned maxCollapsedIndex = 0;
+    for (auto &loops : combinedLoops) {
+      for (auto &loop : loops) {
+        flattenedCombinedLoops.insert(loop);
+        maxCollapsedIndex = std::max(maxCollapsedIndex, loop);
+      }
+    }
+
+    if (maxCollapsedIndex != flattenedCombinedLoops.size() - 1 ||
+        !flattenedCombinedLoops.contains(maxCollapsedIndex)) {
+      llvm::errs()
+          << "collapsed-indices arguments must include all values [0,N).";
+      signalPassFailure();
+      return;
+    }
+
+    // Only apply the transformation on parallel loops where the specified
+    // transformation is valid, but do NOT early abort in the case of invalid
+    // loops.
     module->walk([&](scf::ParallelOp op) {
-      // The common case for GPU dialect will be simplifying the ParallelOp to 3
-      // arguments, so we do that here to simplify things.
-      llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops;
-      if (!clCollapsedIndices0.empty())
-        combinedLoops.push_back(clCollapsedIndices0);
-      if (!clCollapsedIndices1.empty())
-        combinedLoops.push_back(clCollapsedIndices1);
-      if (!clCollapsedIndices2.empty())
-        combinedLoops.push_back(clCollapsedIndices2);
+      if (flattenedCombinedLoops.size() != op.getNumLoops()) {
+        op.emitOpError("has ")
+            << op.getNumLoops()
+            << " iter args while this limited functionality testing pass was "
+               "configured only for loops with exactly "
+            << flattenedCombinedLoops.size() << " iter args.";
+        return;
+      }
       collapseParallelLoops(op, combinedLoops);
     });
   }
 };
 } // namespace
 
-std::unique_ptr<Pass> mlir::createParallelLoopCollapsingPass() {
-  return std::make_unique<ParallelLoopCollapsing>();
+std::unique_ptr<Pass> mlir::createTestSCFParallelLoopCollapsingPass() {
+  return std::make_unique<TestSCFParallelLoopCollapsing>();
 }

diff  --git a/mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir b/mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir
new file mode 100644
index 0000000000000..6f98d2c062a25
--- /dev/null
+++ b/mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir
@@ -0,0 +1,34 @@
+// First test various sets of invalid arguments
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing))' 2>&1 | FileCheck %s --check-prefix=CL0
+// CL0: No collapsed-indices were specified. This pass is only for testing and does not automatically collapse all parallel loops or similar
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-1=1}))' 2>&1 | FileCheck %s --check-prefix=CL1
+// CL1: collapsed-indices-1 specified but not collapsed-indices-0
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=1  collapsed-indices-2=2}))' 2>&1 | FileCheck %s --check-prefix=CL2
+// CL2: collapsed-indices-2 specified but not collapsed-indices-1
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=1  collapsed-indices-1=2}))' 2>&1 | FileCheck %s --check-prefix=NON-ZERO
+// NON-ZERO: collapsed-indices arguments must include all values [0,N).
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0  collapsed-indices-1=2}))' 2>&1 | FileCheck %s --check-prefix=NON-CONTIGUOUS
+// NON-CONTIGUOUS: collapsed-indices arguments must include all values [0,N).
+
+
+// Then test for invalid combinations of argument+input-ir
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}))' -verify-diagnostics
+func.func @too_few_iters(%arg0: index, %arg1: index, %arg2: index) {
+  // expected-error @+1 {{op has 1 iter args while this limited functionality testing pass was configured only for loops with exactly 2 iter args.}}
+  scf.parallel (%arg3) = (%arg0) to (%arg1) step (%arg2) {
+    scf.yield
+  }
+  return
+}
+
+func.func @too_many_iters(%arg0: index, %arg1: index, %arg2: index) {
+  // expected-error @+1 {{op has 3 iter args while this limited functionality testing pass was configured only for loops with exactly 2 iter args.}}
+  scf.parallel (%arg3, %arg4, %arg5) = (%arg0, %arg0, %arg0) to (%arg1, %arg1, %arg1) step (%arg2, %arg2, %arg2) {
+    scf.yield
+  }
+  return
+}

diff  --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir
index 90c3f5d28d6af..c606fe7588526 100644
--- a/mlir/test/Transforms/parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s
 
 // CHECK-LABEL: func @parallel_many_dims() {
 func.func @parallel_many_dims() {

diff  --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
index 91cab126d8dd6..7b6883896dc10 100644
--- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s
 
 func.func @collapse_to_single() {
   %c0 = arith.constant 3 : index