[Mlir-commits] [mlir] 5a99b77 - [MLIR] Extend isLoopMemoryParallel to account for locally allocated memrefs

Thu Mar 3 19:50:37 PST 2022

Author: Uday Bondhugula
Date: 2022-03-04T09:16:28+05:30
New Revision: 5a99b776eb6df2757f4b806c7de8a03b324efeb3

URL: https://github.com/llvm/llvm-project/commit/5a99b776eb6df2757f4b806c7de8a03b324efeb3
DIFF: https://github.com/llvm/llvm-project/commit/5a99b776eb6df2757f4b806c7de8a03b324efeb3.diff

LOG: [MLIR] Extend isLoopMemoryParallel to account for locally allocated memrefs

Extend isLoopMemoryParallel check to include locally allocated memrefs.
This strengthens and also speeds up the dependence check used by the
utility by excluding locally allocated memrefs where appropriate.

Additional memref dialect ops can be supported exhaustively via proper
interfaces.

Reviewed By: dcaballe

Differential Revision: https://reviews.llvm.org/D120617

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h
    mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
    mlir/test/Dialect/Affine/parallelize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h
index 9f790575ccc3f..ac6e0a8d09448 100644

--- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h
@@ -53,8 +53,10 @@ bool isLoopParallel(
     SmallVectorImpl<LoopReduction> *parallelReductions = nullptr);
 
 /// Returns true if `forOp' doesn't have memory dependences preventing
-/// parallelization. This function doesn't check iter_args and should be used
-/// only as a building block for full parallel-checking functions.
+/// parallelization. Memrefs that are allocated inside `forOp` do not impact its
+/// dependences and parallelism. This function does not check iter_args (for
+/// values other than memref types) and should be used only as a building block
+/// for complete parallelism-checking functions.
 bool isLoopMemoryParallel(AffineForOp forOp);
 
 /// Returns in `affineApplyOps`, the sequence of those AffineApplyOp

diff  --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
index 8f6781b4b5ebf..648a18d4eafad 100644
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@@ -17,12 +17,10 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/IntegerSet.h"
-#include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/DenseMap.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -114,18 +112,51 @@ bool mlir::isLoopParallel(AffineForOp forOp,
   return isLoopMemoryParallel(forOp);
 }
 
-/// Returns true if `forOp' doesn't have memory dependences preventing
-/// parallelization. This function doesn't check iter_args and should be used
-/// only as a building block for full parallel-checking functions.
+/// Returns true if `op` is an alloc-like op, i.e., one allocating memrefs.
+static bool isAllocLikeOp(Operation *op) {
+  auto memEffects = dyn_cast<MemoryEffectOpInterface>(op);
+  return memEffects && memEffects.hasEffect<MemoryEffects::Allocate>();
+}
+
+/// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
+/// allocated by an operation nested within `enclosingOp`.
+static bool isLocallyDefined(Value v, Operation *enclosingOp) {
+  Operation *defOp = v.getDefiningOp();
+  if (!defOp)
+    return false;
+
+  if (isAllocLikeOp(defOp) && enclosingOp->isProperAncestor(defOp))
+    return true;
+
+  // Aliasing ops.
+  auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
+  return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
+}
+
 bool mlir::isLoopMemoryParallel(AffineForOp forOp) {
+  // Any memref-typed iteration arguments are treated as serializing.
+  if (llvm::any_of(forOp.getResultTypes(),
+                   [](Type type) { return type.isa<BaseMemRefType>(); }))
+    return false;
+
   // Collect all load and store ops in loop nest rooted at 'forOp'.
   SmallVector<Operation *, 8> loadAndStoreOps;
   auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
-    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
-      loadAndStoreOps.push_back(op);
-    else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
-             !MemoryEffectOpInterface::hasNoEffect(op))
+    if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
+      // Memrefs that are allocated inside `forOp` need not be considered.
+      if (!isLocallyDefined(readOp.getMemRef(), forOp))
+        loadAndStoreOps.push_back(op);
+    } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
+      // Filter out stores the same way as above.
+      if (!isLocallyDefined(writeOp.getMemRef(), forOp))
+        loadAndStoreOps.push_back(op);
+    } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
+               !isAllocLikeOp(op) &&
+               !MemoryEffectOpInterface::hasNoEffect(op)) {
+      // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
+      // as long as they don't escape the loop (which has been checked above).
       return WalkResult::interrupt();
+    }
 
     return WalkResult::advance();
   });

diff  --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
index e9ecc7b122106..c1852f3f57b58 100644
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -269,3 +269,57 @@ func @nested_min_max(%m: memref<?xf32>, %lb0: index,
   }
   return
 }
+
+// Test in the presence of locally allocated memrefs.
+
+// CHECK: func @local_alloc
+func @local_alloc() {
+  %cst = arith.constant 0.0 : f32
+  affine.for %i = 0 to 100 {
+    %m = memref.alloc() : memref<1xf32>
+    %ma = memref.alloca() : memref<1xf32>
+    affine.store %cst, %m[0] : memref<1xf32>
+  }
+  // CHECK: affine.parallel
+  return
+}
+
+// CHECK: func @local_alloc_cast
+func @local_alloc_cast() {
+  %cst = arith.constant 0.0 : f32
+  affine.for %i = 0 to 100 {
+    %m = memref.alloc() : memref<128xf32>
+    affine.for %j = 0 to 128 {
+      affine.store %cst, %m[%j] : memref<128xf32>
+    }
+    affine.for %j = 0 to 128 {
+      affine.store %cst, %m[0] : memref<128xf32>
+    }
+    %r = memref.reinterpret_cast %m to offset: [0], sizes: [8, 16],
+           strides: [16, 1] : memref<128xf32> to memref<8x16xf32>
+    affine.for %j = 0 to 8 {
+      affine.store %cst, %r[%j, %j] : memref<8x16xf32>
+    }
+  }
+  // CHECK: affine.parallel
+  // CHECK:   affine.parallel
+  // CHECK:   }
+  // CHECK:   affine.for
+  // CHECK:   }
+  // CHECK:   affine.parallel
+  // CHECK:   }
+  // CHECK: }
+
+  return
+}
+
+// CHECK-LABEL: @iter_arg_memrefs
+func @iter_arg_memrefs(%in: memref<10xf32>) {
+  %mi = memref.alloc() : memref<f32>
+  // Loop-carried memrefs are treated as serializing the loop.
+  // CHECK: affine.for
+  %mo = affine.for %i = 0 to 10 iter_args(%m_arg = %mi) -> (memref<f32>) {
+    affine.yield %m_arg : memref<f32>
+  }
+  return
+}