[Mlir-commits] [mlir] [MLIR][SCF] Add checks to verify that the pipeliner schedule is correct. (PR #77083)

Thomas Raoux llvmlistbot at llvm.org
Tue Jan 9 08:18:23 PST 2024


https://github.com/ThomasRaoux updated https://github.com/llvm/llvm-project/pull/77083

>From 1bf6958294156f90b3b07bca54da1977e1d1620f Mon Sep 17 00:00:00 2001
From: Thomas Raoux <thomas.raoux at openai.com>
Date: Thu, 4 Jan 2024 03:00:49 -0800
Subject: [PATCH 1/4] [MLIR][SCF] Add checks to verify that the pipeliner
 schedule is correct.

Add a check to validate that the schedule passed to the pipeliner transformation
is valid and won't cause the pipeliner to break SSA.

This checks that the for each operation in the loop operations are scheduled
after their operands.
---
 .../Dialect/SCF/Transforms/LoopPipelining.cpp | 42 +++++++++++++++++++
 mlir/test/Dialect/SCF/loop-pipelining.mlir    | 34 ++++++++++++++-
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
index 7d45b484f76575..4de5a495c9290a 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
@@ -67,6 +67,10 @@ struct LoopPipelinerInternal {
   /// the Value.
   std::pair<Operation *, int64_t> getDefiningOpAndDistance(Value value);
 
+  /// Return true if the schedule is possible and return false otherwise. A
+  /// schedule is correct if all definitions are scheduled before uses.
+  bool verifySchedule();
+
 public:
   /// Initalize the information for the given `op`, return true if it
   /// satisfies the pre-condition to apply pipelining.
@@ -156,6 +160,11 @@ bool LoopPipelinerInternal::initializeLoopInfo(
     }
   }
 
+  if (!verifySchedule()) {
+    LDBG("--invalid schedule: " << op << " -> BAIL");
+    return false;
+  }
+
   // Currently, we do not support assigning stages to ops in nested regions. The
   // block of all operations assigned a stage should be the single `scf.for`
   // body block.
@@ -330,6 +339,39 @@ LoopPipelinerInternal::getDefiningOpAndDistance(Value value) {
   return {def, distance};
 }
 
+/// Compute unrolled cycles of each op and verify that each op is scheduled
+/// after its operands (modulo the distance between producer and consumer).
+bool LoopPipelinerInternal::verifySchedule() {
+  int64_t numCylesPerIter = opOrder.size();
+  // Pre-compute the unrolled cycle of each op.
+  DenseMap<Operation *, int64_t> unrolledCyles;
+  for (int64_t cycle = 0; cycle < numCylesPerIter; cycle++) {
+    Operation *def = opOrder[cycle];
+    auto it = stages.find(def);
+    assert(it != stages.end());
+    int64_t stage = it->second;
+    unrolledCyles[def] = cycle + stage * numCylesPerIter;
+  }
+  for (Operation *consumer : opOrder) {
+    int64_t consumerCycle = unrolledCyles[consumer];
+    for (Value operand : consumer->getOperands()) {
+      auto [producer, distance] = getDefiningOpAndDistance(operand);
+      if (!producer)
+        continue;
+      auto it = unrolledCyles.find(producer);
+      // Skip producer coming from outside the loop.
+      if (it == unrolledCyles.end())
+        continue;
+      int64_t producerCycle = it->second;
+      if (consumerCycle < producerCycle - numCylesPerIter * distance) {
+        consumer->emitError("operation scheduled before its operands.");
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 scf::ForOp LoopPipelinerInternal::createKernelLoop(
     const llvm::MapVector<Value, LoopPipelinerInternal::LiverangeInfo>
         &crossStageValues,
diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir
index 33290d2db31d66..694c0c321e6b36 100644
--- a/mlir/test/Dialect/SCF/loop-pipelining.mlir
+++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-scf-pipelining -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -test-scf-pipelining -split-input-file -verify-diagnostics | FileCheck %s
 // RUN: mlir-opt %s -test-scf-pipelining=annotate -split-input-file | FileCheck %s --check-prefix ANNOTATE
 // RUN: mlir-opt %s -test-scf-pipelining=no-epilogue-peeling -split-input-file | FileCheck %s --check-prefix NOEPILOGUE
 
@@ -814,3 +814,35 @@ func.func @yield_constant_loop(%A: memref<?xf32>) -> f32 {
   return %r : f32
 }
 
+// -----
+
+func.func @invalid_schedule(%A: memref<?xf32>, %result: memref<?xf32>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %cf = arith.constant 1.0 : f32
+  scf.for %i0 = %c0 to %c4 step %c1 {
+    %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32>
+    %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32
+    // expected-error at +1 {{operation scheduled before its operands.}}
+    memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : memref<?xf32>
+  }  { __test_pipelining_loop__ }
+  return
+}
+
+// -----
+
+func.func @invalid_schedule2(%A: memref<?xf32>, %result: memref<?xf32>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %cf = arith.constant 1.0 : f32
+  %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%idx = %c0) -> (index) {
+    // expected-error at +1 {{operation scheduled before its operands.}}
+    %A_elem = memref.load %A[%idx] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 0 } : memref<?xf32>
+    %idx1 = arith.addi %idx, %c1 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : index
+    memref.store %A_elem, %result[%idx] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 2 } : memref<?xf32>
+    scf.yield %idx1 : index
+  }  { __test_pipelining_loop__ }
+  return
+}

>From 7286bb486e2ba89519c86ea689b6e378970e08e8 Mon Sep 17 00:00:00 2001
From: Thomas Raoux <thomas.raoux at openai.com>
Date: Tue, 9 Jan 2024 06:07:15 -0800
Subject: [PATCH 2/4] Update mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp

Co-authored-by: Rik Huijzer <github at huijzer.xyz>
---
 mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
index 4de5a495c9290a..ee48d057b6e85e 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
@@ -339,8 +339,9 @@ LoopPipelinerInternal::getDefiningOpAndDistance(Value value) {
   return {def, distance};
 }
 
-/// Compute unrolled cycles of each op and verify that each op is scheduled
-/// after its operands (modulo the distance between producer and consumer).
+/// Compute unrolled cycles of each op (consumer) and verify that each op is
+/// scheduled after its operands (producers) while adjusting for the distance
+/// between producer and consumer.
 bool LoopPipelinerInternal::verifySchedule() {
   int64_t numCylesPerIter = opOrder.size();
   // Pre-compute the unrolled cycle of each op.

>From 57a3016e0ffcc472e00d8c3c317f25a212929f50 Mon Sep 17 00:00:00 2001
From: Thomas Raoux <thomas.raoux at openai.com>
Date: Tue, 9 Jan 2024 06:13:27 -0800
Subject: [PATCH 3/4] Address review comments

---
 .../Dialect/SCF/Transforms/LoopPipelining.cpp | 68 +++++++++----------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
index ee48d057b6e85e..9eda1a4597ba43 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
@@ -203,6 +203,40 @@ bool LoopPipelinerInternal::initializeLoopInfo(
   return true;
 }
 
+/// Compute unrolled cycles of each op (consumer) and verify that each op is
+/// scheduled after its operands (producers) while adjusting for the distance
+/// between producer and consumer.
+bool LoopPipelinerInternal::verifySchedule() {
+  int64_t numCylesPerIter = opOrder.size();
+  // Pre-compute the unrolled cycle of each op.
+  DenseMap<Operation *, int64_t> unrolledCyles;
+  for (int64_t cycle = 0; cycle < numCylesPerIter; cycle++) {
+    Operation *def = opOrder[cycle];
+    auto it = stages.find(def);
+    assert(it != stages.end());
+    int64_t stage = it->second;
+    unrolledCyles[def] = cycle + stage * numCylesPerIter;
+  }
+  for (Operation *consumer : opOrder) {
+    int64_t consumerCycle = unrolledCyles[consumer];
+    for (Value operand : consumer->getOperands()) {
+      auto [producer, distance] = getDefiningOpAndDistance(operand);
+      if (!producer)
+        continue;
+      auto it = unrolledCyles.find(producer);
+      // Skip producer coming from outside the loop.
+      if (it == unrolledCyles.end())
+        continue;
+      int64_t producerCycle = it->second;
+      if (consumerCycle < producerCycle - numCylesPerIter * distance) {
+        consumer->emitError("operation scheduled before its operands");
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// Clone `op` and call `callback` on the cloned op's oeprands as well as any
 /// operands of nested ops that:
 /// 1) aren't defined within the new op or
@@ -339,40 +373,6 @@ LoopPipelinerInternal::getDefiningOpAndDistance(Value value) {
   return {def, distance};
 }
 
-/// Compute unrolled cycles of each op (consumer) and verify that each op is
-/// scheduled after its operands (producers) while adjusting for the distance
-/// between producer and consumer.
-bool LoopPipelinerInternal::verifySchedule() {
-  int64_t numCylesPerIter = opOrder.size();
-  // Pre-compute the unrolled cycle of each op.
-  DenseMap<Operation *, int64_t> unrolledCyles;
-  for (int64_t cycle = 0; cycle < numCylesPerIter; cycle++) {
-    Operation *def = opOrder[cycle];
-    auto it = stages.find(def);
-    assert(it != stages.end());
-    int64_t stage = it->second;
-    unrolledCyles[def] = cycle + stage * numCylesPerIter;
-  }
-  for (Operation *consumer : opOrder) {
-    int64_t consumerCycle = unrolledCyles[consumer];
-    for (Value operand : consumer->getOperands()) {
-      auto [producer, distance] = getDefiningOpAndDistance(operand);
-      if (!producer)
-        continue;
-      auto it = unrolledCyles.find(producer);
-      // Skip producer coming from outside the loop.
-      if (it == unrolledCyles.end())
-        continue;
-      int64_t producerCycle = it->second;
-      if (consumerCycle < producerCycle - numCylesPerIter * distance) {
-        consumer->emitError("operation scheduled before its operands.");
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 scf::ForOp LoopPipelinerInternal::createKernelLoop(
     const llvm::MapVector<Value, LoopPipelinerInternal::LiverangeInfo>
         &crossStageValues,

>From 8db5758c3dde3fe708332651def1dbd2c08fdc9a Mon Sep 17 00:00:00 2001
From: Thomas Raoux <thomas.raoux at openai.com>
Date: Tue, 9 Jan 2024 08:17:57 -0800
Subject: [PATCH 4/4] update test

---
 mlir/test/Dialect/SCF/loop-pipelining.mlir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir
index 694c0c321e6b36..8d6f454d187518 100644
--- a/mlir/test/Dialect/SCF/loop-pipelining.mlir
+++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir
@@ -824,7 +824,7 @@ func.func @invalid_schedule(%A: memref<?xf32>, %result: memref<?xf32>) {
   scf.for %i0 = %c0 to %c4 step %c1 {
     %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32>
     %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32
-    // expected-error at +1 {{operation scheduled before its operands.}}
+    // expected-error at +1 {{operation scheduled before its operands}}
     memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : memref<?xf32>
   }  { __test_pipelining_loop__ }
   return
@@ -838,7 +838,7 @@ func.func @invalid_schedule2(%A: memref<?xf32>, %result: memref<?xf32>) {
   %c4 = arith.constant 4 : index
   %cf = arith.constant 1.0 : f32
   %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%idx = %c0) -> (index) {
-    // expected-error at +1 {{operation scheduled before its operands.}}
+    // expected-error at +1 {{operation scheduled before its operands}}
     %A_elem = memref.load %A[%idx] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 0 } : memref<?xf32>
     %idx1 = arith.addi %idx, %c1 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : index
     memref.store %A_elem, %result[%idx] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 2 } : memref<?xf32>



More information about the Mlir-commits mailing list