[Mlir-commits] [mlir] 1d541bd - [mlir][affine] Support affine.parallel in the index set analysis

Sun Dec 4 03:50:58 PST 2022

Author: Kai Sasaki
Date: 2022-12-04T20:36:48+09:00
New Revision: 1d541bd92044432e98c58e2d291c9776516b42f9

URL: https://github.com/llvm/llvm-project/commit/1d541bd92044432e98c58e2d291c9776516b42f9
DIFF: https://github.com/llvm/llvm-project/commit/1d541bd92044432e98c58e2d291c9776516b42f9.diff

LOG: [mlir][affine] Support affine.parallel in the index set analysis

Support affine.parallel in the index set analysis. It allows us to do dependence analysis containing affine.parallel in addition to affine.for and affine.if. This change only supports the constant lower/upper bound in affine.parallel. Other complicated affine map bounds will be supported in further commits.

See https://github.com/llvm/llvm-project/issues/57327

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D136056

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
    mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
    mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
    mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
    mlir/lib/Dialect/Affine/IR/AffineOps.cpp
    mlir/test/Dialect/Affine/scalrep.mlir
    mlir/test/Transforms/memref-dependence-check.mlir
    mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
index 29c059fbd1223..d6cd025d007d4 100644

--- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
@@ -24,6 +24,7 @@ namespace mlir {
 class AffineCondition;
 class AffineForOp;
 class AffineIfOp;
+class AffineParallelOp;
 class AffineMap;
 class AffineValueMap;
 class IntegerSet;
@@ -141,6 +142,13 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron {
   //  TODO: add support for non-unit strides.
   LogicalResult addAffineForOpDomain(AffineForOp forOp);
 
+  /// Add constraints (lower and upper bounds) for the specified
+  /// 'affine.parallel' operation's Value using IR information stored in its
+  /// bound maps. Returns failure for the yet unimplemented/unsupported cases.
+  /// Asserts if the Value corresponding to the 'affine.parallel' operation
+  /// isn't found in the constraint system.
+  LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp);
+
   /// Adds constraints (lower and upper bounds) for each loop in the loop nest
   /// described by the bound maps `lbMaps` and `ubMaps` of a computation slice.
   /// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in

diff  --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
index 686a51a9f1dd5..e08b206be30c2 100644
--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
@@ -450,6 +450,11 @@ AffineForOp getForInductionVarOwner(Value val);
 void extractForInductionVars(ArrayRef<AffineForOp> forInsts,
                              SmallVectorImpl<Value> *ivs);
 
+/// Extracts the induction variables from a list of either AffineForOp or
+/// AffineParallelOp and places them in the output argument `ivs`.
+void extractInductionVars(ArrayRef<Operation *> affineOps,
+                          SmallVectorImpl<Value> &ivs);
+
 /// Builds a perfect nest of affine.for loops, i.e., each loop except the
 /// innermost one contains only another loop and a terminator. The loops iterate
 /// from "lbs" to "ubs" with "steps". The body of the innermost loop is

diff  --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
index 0fddcd72dff49..82ffbbcf2ccd4 100644
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@@ -240,27 +240,36 @@ void mlir::getReachableAffineApplyOps(
 LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops,
                                 FlatAffineValueConstraints *domain) {
   SmallVector<Value, 4> indices;
-  SmallVector<AffineForOp, 8> forOps;
+  SmallVector<Operation *, 8> loopOps;
+  size_t numDims = 0;
   for (Operation *op : ops) {
-    if (!isa<AffineForOp, AffineIfOp>(op)) {
-      // TODO: Support affine.parallel ops.
-      LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if ops");
+    if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
+      LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
+                                 "parallel ops");
       return failure();
     }
-    if (AffineForOp forOp = dyn_cast<AffineForOp>(op))
-      forOps.push_back(forOp);
+    if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
+      loopOps.push_back(forOp);
+      // An AffineForOp retains only 1 induction variable.
+      numDims += 1;
+    } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
+      loopOps.push_back(parallelOp);
+      numDims += parallelOp.getNumDims();
+    }
   }
-  extractForInductionVars(forOps, &indices);
-  // Reset while associated Values in 'indices' to the domain.
-  domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
+  extractInductionVars(loopOps, indices);
+  // Reset while associating Values in 'indices' to the domain.
+  domain->reset(numDims, /*numSymbols=*/0, /*numLocals=*/0, indices);
   for (Operation *op : ops) {
     // Add constraints from forOp's bounds.
     if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
       if (failed(domain->addAffineForOpDomain(forOp)))
         return failure();
-    } else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) {
+    } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
       domain->addAffineIfOpDomain(ifOp);
-    }
+    } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
+      if (failed(domain->addAffineParallelOpDomain(parallelOp)))
+        return failure();
   }
   return success();
 }
@@ -594,6 +603,12 @@ DependenceResult mlir::checkMemrefAccessDependence(
   if (srcAccess.memref != dstAccess.memref)
     return DependenceResult::NoDependence;
 
+  // TODO: Support affine.parallel which does not specify the ordering.
+  auto srcParent = srcAccess.opInst->getParentOfType<AffineParallelOp>();
+  auto dstParent = dstAccess.opInst->getParentOfType<AffineParallelOp>();
+  if (srcParent || dstParent)
+    return DependenceResult::Failure;
+
   // Return 'NoDependence' if one of these accesses is not an
   // AffineWriteOpInterface.
   if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&

diff  --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
index 5c284c1b08e2c..15b50a4f0ae6a 100644
--- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
@@ -639,6 +639,33 @@ FlatAffineValueConstraints::addAffineForOpDomain(AffineForOp forOp) {
                   forOp.getUpperBoundOperands());
 }
 
+LogicalResult FlatAffineValueConstraints::addAffineParallelOpDomain(
+    AffineParallelOp parallelOp) {
+  size_t ivPos = 0;
+  for (auto iv : parallelOp.getIVs()) {
+    unsigned pos;
+    if (!findVar(iv, &pos)) {
+      assert(false && "variable expected for the IV value");
+      return failure();
+    }
+
+    AffineMap lowerBound = parallelOp.getLowerBoundMap(ivPos);
+    if (lowerBound.isConstant())
+      addBound(BoundType::LB, pos, lowerBound.getSingleConstantResult());
+    else if (failed(addBound(BoundType::LB, pos, lowerBound,
+                             parallelOp.getLowerBoundsOperands())))
+      return failure();
+
+    auto upperBound = parallelOp.getUpperBoundMap(ivPos);
+    if (upperBound.isConstant())
+      addBound(BoundType::UB, pos, upperBound.getSingleConstantResult());
+    else if (failed(addBound(BoundType::UB, pos, upperBound,
+                             parallelOp.getUpperBoundsOperands())))
+      return failure();
+  }
+  return success();
+}
+
 LogicalResult
 FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef<AffineMap> lbMaps,
                                                    ArrayRef<AffineMap> ubMaps,

diff  --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index d7d21c8df6bd3..ffa354f7b52bd 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -2318,6 +2318,19 @@ void mlir::extractForInductionVars(ArrayRef<AffineForOp> forInsts,
     ivs->push_back(forInst.getInductionVar());
 }
 
+void mlir::extractInductionVars(ArrayRef<mlir::Operation *> affineOps,
+                                SmallVectorImpl<mlir::Value> &ivs) {
+  ivs.reserve(affineOps.size());
+  for (Operation *op : affineOps) {
+    // Add constraints from forOp's bounds.
+    if (auto forOp = dyn_cast<AffineForOp>(op))
+      ivs.push_back(forOp.getInductionVar());
+    else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
+      for (size_t i = 0; i < parallelOp.getBody()->getNumArguments(); i++)
+        ivs.push_back(parallelOp.getBody()->getArgument(i));
+  }
+}
+
 /// Builds an affine loop nest, using "loopCreatorFn" to create individual loop
 /// operations.
 template <typename BoundListTy, typename LoopCreatorTy>

diff  --git a/mlir/test/Dialect/Affine/scalrep.mlir b/mlir/test/Dialect/Affine/scalrep.mlir
index f220ef6502b42..972133a4b4a01 100644
--- a/mlir/test/Dialect/Affine/scalrep.mlir
+++ b/mlir/test/Dialect/Affine/scalrep.mlir
@@ -788,3 +788,61 @@ func.func @no_forwarding_across_scopes() -> memref<1xf32> {
   }
   return %A : memref<1xf32>
 }
+
+// CHECK-LABEL: func @parallel_store_load() {
+func.func @parallel_store_load() {
+  %cf7 = arith.constant 7.0 : f32
+  %m = memref.alloc() : memref<10xf32>
+  affine.parallel (%i0) = (0) to (10) {
+    affine.store %cf7, %m[%i0] : memref<10xf32>
+    %v0 = affine.load %m[%i0] : memref<10xf32>
+    %v1 = arith.addf %v0, %v0 : f32
+  }
+  memref.dealloc %m : memref<10xf32>
+  return
+// CHECK:       %[[C7:.*]] = arith.constant 7.000000e+00 : f32
+// CHECK-NEXT:  affine.parallel (%{{.*}}) = (0) to (10) {
+// CHECK-NEXT:    arith.addf %[[C7]], %[[C7]] : f32
+// CHECK-NEXT:  }
+// CHECK-NEXT:  return
+}
+
+func.func @non_constant_parallel_store_load(%N : index) {
+  %cf7 = arith.constant 7.0 : f32
+  %m = memref.alloc() : memref<10xf32>
+  affine.parallel (%i0) = (0) to (%N) {
+    affine.store %cf7, %m[%i0] : memref<10xf32>
+    %v0 = affine.load %m[%i0] : memref<10xf32>
+    %v1 = arith.addf %v0, %v0 : f32
+  }
+  memref.dealloc %m : memref<10xf32>
+  return
+}
+// CHECK: func.func @non_constant_parallel_store_load(%[[ARG0:.*]]: index) {
+// CHECK-NEXT:  %[[C7:.*]] = arith.constant 7.000000e+00 : f32
+// CHECK-NEXT:  affine.parallel (%{{.*}}) = (0) to (%[[ARG0]]) {
+// CHECK-NEXT:    arith.addf %[[C7]], %[[C7]] : f32
+// CHECK-NEXT:  }
+// CHECK-NEXT:  return
+
+// CHECK-LABEL: func @parallel_surrounding_for() {
+func.func @parallel_surrounding_for() {
+  %cf7 = arith.constant 7.0 : f32
+  %m = memref.alloc() : memref<10x10xf32>
+  affine.parallel (%i0) = (0) to (10) {
+    affine.for %i1 = 0 to 10 {
+      affine.store %cf7, %m[%i0,%i1] : memref<10x10xf32>
+      %v0 = affine.load %m[%i0,%i1] : memref<10x10xf32>
+      %v1 = arith.addf %v0, %v0 : f32
+    }
+  }
+  memref.dealloc %m : memref<10x10xf32>
+  return
+// CHECK:       %[[C7:.*]] = arith.constant 7.000000e+00 : f32
+// CHECK-NEXT:  affine.parallel (%{{.*}}) = (0) to (10) {
+// CHECK-NEXT:    affine.for %{{.*}} = 0 to 10 {
+// CHECK-NEXT:      arith.addf %[[C7]], %[[C7]] : f32
+// CHECK-NEXT:    }
+// CHECK-NEXT:  }
+// CHECK-NEXT:  return
+}

diff  --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir
index 3eb5b70b7ac64..2e28a4cc7e312 100644
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@@ -1064,3 +1064,19 @@ func.func @test_interleaved_affine_for_if() {
 
   return
 }
+
+// -----
+// CHECK-LABEL: func @parallel_dependence_check_failure() {
+func.func @parallel_dependence_check_failure() {
+  %0 = memref.alloc() : memref<10xf32>
+  %cst = arith.constant 7.000000e+00 : f32
+  affine.parallel (%i0) = (0) to (10) {
+    // expected-error @+1 {{dependence check failed}}
+    affine.store %cst, %0[%i0] : memref<10xf32>
+  }
+  affine.parallel (%i1) = (0) to (10) {
+    // expected-error @+1 {{dependence check failed}}
+    %1 = affine.load %0[%i1] : memref<10xf32>
+  }
+  return
+}

diff  --git a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp
index a45e21afb5761..d1bcb8dabeb85 100644
--- a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp
+++ b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp
@@ -86,15 +86,18 @@ static void checkDependences(ArrayRef<Operation *> loadsAndStores) {
         DependenceResult result = checkMemrefAccessDependence(
             srcAccess, dstAccess, d, &dependenceConstraints,
             &dependenceComponents);
-        assert(result.value != DependenceResult::Failure);
-        bool ret = hasDependence(result);
-        // TODO: Print dependence type (i.e. RAW, etc) and print
-        // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
-        // vectors from ([1, 1], [3, 3]) to (1, 3).
-        srcOpInst->emitRemark("dependence from ")
-            << i << " to " << j << " at depth " << d << " = "
-            << getDirectionVectorStr(ret, numCommonLoops, d,
-                                     dependenceComponents);
+        if (result.value == DependenceResult::Failure) {
+          srcOpInst->emitError("dependence check failed");
+        } else {
+          bool ret = hasDependence(result);
+          // TODO: Print dependence type (i.e. RAW, etc) and print
+          // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance
+          // vectors from ([1, 1], [3, 3]) to (1, 3).
+          srcOpInst->emitRemark("dependence from ")
+              << i << " to " << j << " at depth " << d << " = "
+              << getDirectionVectorStr(ret, numCommonLoops, d,
+                                       dependenceComponents);
+        }
       }
     }
   }