[Mlir-commits] [mlir] 2fe30a3 - [mlir] properly support min/max in affine parallelization

Tue Dec 8 01:43:44 PST 2020

Author: Alex Zinenko
Date: 2020-12-08T10:43:35+01:00
New Revision: 2fe30a3534dad9f982a3d840b4bfa4870b2ba5bc

URL: https://github.com/llvm/llvm-project/commit/2fe30a3534dad9f982a3d840b4bfa4870b2ba5bc
DIFF: https://github.com/llvm/llvm-project/commit/2fe30a3534dad9f982a3d840b4bfa4870b2ba5bc.diff

LOG: [mlir] properly support min/max in affine parallelization

The existing implementation of the affine parallelization silently copies over
the lower and upper bound maps from affine.for to affine.parallel. However, the
semantics of these maps differ between these two ops: in affine.for, a max(min)
of results is taken for the lower(upper) bound; in affine.parallel, multiple
induction variables can be defined an each result corresponds to one induction
variable. Thus the existing implementation could generate invalid IR or IR that
passes the verifier but has different semantics than the original code. Fix the
parallelization utility to emit dedicated min/max operations before the
affine.parallel in such cases. Disallow parallelization if min/max would have
been in an operation without the AffineScope trait, e.g., in another loop,
since the result of these operations is not considered a valid affine dimension
identifier and may not be properly handled by the affine analyses.

Reviewed By: wsmoses

Differential Revision: https://reviews.llvm.org/D92763

Added: 
    

Modified: 
    mlir/lib/Dialect/Affine/Utils/Utils.cpp
    mlir/test/Dialect/Affine/parallelize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 7892dfbc7a48..e5f5a6d8998f 100644

--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -134,11 +134,43 @@ static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
 void mlir::affineParallelize(AffineForOp forOp) {
   Location loc = forOp.getLoc();
   OpBuilder outsideBuilder(forOp);
+
+  // If a loop has a 'max' in the lower bound, emit it outside the parallel loop
+  // as it does not have implicit 'max' behavior.
+  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
+  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
+  AffineMap upperBoundMap = forOp.getUpperBoundMap();
+  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
+
+  bool needsMax = lowerBoundMap.getNumResults() > 1;
+  bool needsMin = upperBoundMap.getNumResults() > 1;
+  AffineMap identityMap;
+  if (needsMax || needsMin) {
+    if (forOp->getParentOp() &&
+        !forOp->getParentOp()->hasTrait<OpTrait::AffineScope>())
+      return;
+
+    identityMap = AffineMap::getMultiDimIdentityMap(1, loc->getContext());
+  }
+  if (needsMax) {
+    auto maxOp = outsideBuilder.create<AffineMaxOp>(loc, lowerBoundMap,
+                                                    lowerBoundOperands);
+    lowerBoundMap = identityMap;
+    lowerBoundOperands = maxOp->getResults();
+  }
+
+  // Same for the upper bound.
+  if (needsMin) {
+    auto minOp = outsideBuilder.create<AffineMinOp>(loc, upperBoundMap,
+                                                    upperBoundOperands);
+    upperBoundMap = identityMap;
+    upperBoundOperands = minOp->getResults();
+  }
+
   // Creating empty 1-D affine.parallel op.
   AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
-      loc, llvm::None, llvm::None, forOp.getLowerBoundMap(),
-      forOp.getLowerBoundOperands(), forOp.getUpperBoundMap(),
-      forOp.getUpperBoundOperands());
+      loc, llvm::None, llvm::None, lowerBoundMap, lowerBoundOperands,
+      upperBoundMap, upperBoundOperands);
   // Steal the body of the old affine for op and erase it.
   newPloop.region().takeBody(forOp.region());
   forOp.erase();

diff  --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
index 8e6cb05f46a0..cbc80a092e76 100644
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -114,3 +114,33 @@ func @non_affine_load() {
   }
   return
 }
+
+// CHECK-LABEL: for_with_minmax
+func @for_with_minmax(%m: memref<?xf32>, %lb0: index, %lb1: index,
+                      %ub0: index, %ub1: index) {
+  // CHECK: %[[lb:.*]] = affine.max
+  // CHECK: %[[ub:.*]] = affine.min
+  // CHECK: affine.parallel (%{{.*}}) = (%[[lb]]) to (%[[ub]])
+  affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %lb1)
+          to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) {
+    affine.load %m[%i] : memref<?xf32>
+  }
+  return
+}
+
+// CHECK-LABEL: nested_for_with_minmax
+func @nested_for_with_minmax(%m: memref<?xf32>, %lb0: index,
+                             %ub0: index, %ub1: index) {
+  // CHECK: affine.parallel
+  affine.for %j = 0 to 10 {
+    // Cannot parallelize the inner loop because we would need to compute
+    // affine.max for its lower bound inside the loop, and that is not (yet)
+    // considered as a valid affine dimension.
+    // CHECK: affine.for
+    affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %j)
+            to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) {
+      affine.load %m[%i] : memref<?xf32>
+    }
+  }
+  return
+}