[Mlir-commits] [mlir] 366d843 - [mlir][gpu] Fix bug in kernel outlining

Fri Oct 9 06:03:40 PDT 2020

Author: Stephan Herhut
Date: 2020-10-09T15:03:14+02:00
New Revision: 366d8435b41dcc01013c507681523c65cdee2180

URL: https://github.com/llvm/llvm-project/commit/366d8435b41dcc01013c507681523c65cdee2180
DIFF: https://github.com/llvm/llvm-project/commit/366d8435b41dcc01013c507681523c65cdee2180.diff

LOG: [mlir][gpu] Fix bug in kernel outlining

The updated version of kernel outlining did not handle cases correctly
where an operand of a candidate for sinking itself was defined by an operation
that is a sinking candidate. In such cases, it could happen that sunk
operations were inserted in the wrong order, breaking ssa properties.

Differential Revision: https://reviews.llvm.org/D89112

Added: 
    

Modified: 
    mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
    mlir/test/Dialect/GPU/outlining.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 689161ed1fa2..171d50c3e0da 100644

--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -65,16 +65,18 @@ static bool isSinkingBeneficiary(Operation *op) {
 /// operation into the kernel. An operation can be sunk if doing so does not
 /// introduce new kernel arguments. Whether a value is already available in the
 /// kernel (and hence does not introduce new arguments) is checked by
-/// querying `availableValues`.
+/// querying `existingDependencies` and `availableValues`.
 /// If an operand is not yet available, we recursively check whether it can be
 /// made available by siking its defining op.
 /// Operations that are indentified for sinking are added to `beneficiaryOps` in
-/// the order the should appear in the kernel. Furthermore, `availableValues` is
-/// updated with results that will be available after sinking the identified
+/// the order they should appear in the kernel. Furthermore, `availableValues`
+/// is updated with results that will be available after sinking the identified
 /// ops.
-static bool extractBeneficiaryOps(Operation *op,
-                                  llvm::SetVector<Operation *> &beneficiaryOps,
-                                  llvm::SetVector<Value> &availableValues) {
+static bool
+extractBeneficiaryOps(Operation *op,
+                      llvm::SetVector<Value> existingDependencies,
+                      llvm::SetVector<Operation *> &beneficiaryOps,
+                      llvm::SmallPtrSetImpl<Value> &availableValues) {
   if (beneficiaryOps.count(op))
     return true;
 
@@ -85,10 +87,13 @@ static bool extractBeneficiaryOps(Operation *op,
     // It is already visisble in the kernel, keep going.
     if (availableValues.count(operand))
       continue;
-    // Else check whether it can be made available via sinking.
+    // Else check whether it can be made available via sinking or already is a
+    // dependency.
     Operation *definingOp = operand.getDefiningOp();
-    if (!definingOp ||
-        !extractBeneficiaryOps(definingOp, beneficiaryOps, availableValues))
+    if ((!definingOp ||
+         !extractBeneficiaryOps(definingOp, existingDependencies,
+                                beneficiaryOps, availableValues)) &&
+        !existingDependencies.count(operand))
       return false;
   }
   // We will sink the operation, mark its results as now available.
@@ -106,13 +111,13 @@ LogicalResult mlir::sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp) {
   llvm::SetVector<Value> sinkCandidates;
   getUsedValuesDefinedAbove(launchOpBody, sinkCandidates);
 
-  SmallVector<Value, 4> worklist(sinkCandidates.begin(), sinkCandidates.end());
   llvm::SetVector<Operation *> toBeSunk;
-  for (Value operand : worklist) {
+  llvm::SmallPtrSet<Value, 4> availableValues;
+  for (Value operand : sinkCandidates) {
     Operation *operandOp = operand.getDefiningOp();
     if (!operandOp)
       continue;
-    extractBeneficiaryOps(operandOp, toBeSunk, sinkCandidates);
+    extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues);
   }
 
   // Insert operations so that the defs get cloned before uses.

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index d43bbc2eb992..5fd8b6ce79cd 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -165,14 +165,15 @@ func @extra_constants_noarg(%arg0: memref<?xf32>, %arg1: memref<?xf32>) {
 
 // -----
 
+// CHECK-LABEL: @multiple_uses
 func @multiple_uses(%arg0 : memref<?xf32>) {
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   // CHECK: gpu.func {{.*}} {
-  // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK: "use1"(%[[C2]], %[[C2]])
-  // CHECK: "use2"(%[[C2]])
-  // CHECK: gpu.return
+  // CHECK:   %[[C2:.*]] = constant 2 : index
+  // CHECK:   "use1"(%[[C2]], %[[C2]])
+  // CHECK:   "use2"(%[[C2]])
+  // CHECK:   gpu.return
   // CHECK: }
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1,
                                        %grid_z = %c1)
@@ -187,6 +188,33 @@ func @multiple_uses(%arg0 : memref<?xf32>) {
 
 // -----
 
+// CHECK-LABEL: @multiple_uses2
+func @multiple_uses2(%arg0 : memref<*xf32>) {
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %d = dim %arg0, %c2 : memref<*xf32>
+  // CHECK: gpu.func {{.*}} {
+  // CHECK:   %[[C2:.*]] = constant 2 : index
+  // CHECK:   %[[D:.*]] = dim %[[ARG:.*]], %[[C2]]
+  // CHECK:   "use1"(%[[D]])
+  // CHECK:   "use2"(%[[C2]], %[[C2]])
+  // CHECK:   "use3"(%[[ARG]])
+  // CHECK:   gpu.return
+  // CHECK: }
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1,
+                                       %grid_z = %c1)
+             threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1,
+                                        %block_z = %c1) {
+    "use1"(%d) : (index) -> ()
+    "use2"(%c2, %c2) : (index, index) -> ()
+    "use3"(%arg0) : (memref<*xf32>) -> ()
+    gpu.terminator
+  }
+  return
+}
+
+// -----
+
 llvm.mlir.global internal @global(42 : i64) : !llvm.i64
 
 //CHECK-LABEL: @function_call