[Mlir-commits] [mlir] 3d287a7 - [MLIR] Fix/generalize affine LICM check for side-effecting ops

Mon Jul 10 01:41:16 PDT 2023

Author: Uday Bondhugula
Date: 2023-07-10T14:08:43+05:30
New Revision: 3d287a755bed84b7a8ffb771c6b1d5895c187346

URL: https://github.com/llvm/llvm-project/commit/3d287a755bed84b7a8ffb771c6b1d5895c187346
DIFF: https://github.com/llvm/llvm-project/commit/3d287a755bed84b7a8ffb771c6b1d5895c187346.diff

LOG: [MLIR] Fix/generalize affine LICM check for side-effecting ops

Fix affine LICM for side-effecting ops. The code was special-cased for
DMA ops. Generalize it and use isMemoryEffectFree.

Differential Revision: https://reviews.llvm.org/D154783

Added: 
    

Modified: 
    mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
    mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index 3f599b060f365a..c9b7f25c545cd1 100644

--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -26,6 +26,7 @@
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/Matchers.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -49,7 +50,6 @@ namespace {
 
 /// Loop invariant code motion (LICM) pass.
 /// TODO: The pass is missing zero-trip tests.
-/// TODO: Check for the presence of side effects before hoisting.
 /// TODO: This code should be removed once the new LICM pass can handle its
 ///       uses.
 struct LoopInvariantCodeMotion
@@ -92,13 +92,11 @@ static bool isOpLoopInvariant(Operation &op, Value indVar, ValueRange iterArgs,
     if (!areAllOpsInTheBlockListInvariant(parOp.getLoopBody(), indVar, iterArgs,
                                           opsWithUsers, opsToHoist))
       return false;
-  } else if (isa<AffineDmaStartOp, AffineDmaWaitOp>(op)) {
-    // TODO: Support DMA ops.
-    // FIXME: This should be fixed to not special-case these affine DMA ops but
-    // instead rely on side effects.
-    return false;
-  } else if (op.getNumRegions() > 0) {
-    // We can't handle region-holding ops we don't know about.
+  } else if (!isMemoryEffectFree(&op) &&
+             !isa<AffineReadOpInterface, AffineWriteOpInterface,
+                  AffinePrefetchOp>(&op)) {
+    // Check for side-effecting ops. Affine read/write ops are handled
+    // separately below.
     return false;
   } else if (!matchPattern(&op, m_Constant())) {
     // Register op in the set of ops that have users.

diff  --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
index 50bc938d0fa40c..c04d7d2053866c 100644
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -867,4 +867,49 @@ func.func @affine_prefetch_invariant() {
   // CHECK-NEXT:   }
   // CHECK-NEXT: }
   return
-}
\ No newline at end of file
+}
+
+// Side-effecting ops shouldn't be hoisted.
+
+// CHECK-LABEL: func @side_effecting_ops
+func.func @side_effecting_ops() {
+  %cst = arith.constant 0.0 : f32
+  %m0 = memref.alloc(): memref<1x512x16x16xf32>
+  %0 = gpu.wait async
+  affine.for %arg783 = 0 to 14 {
+    affine.for %arg784 = 0 to 14 {
+      affine.parallel (%arg785) = (0) to (512) {
+        affine.for %arg786 = 0 to 1 {
+          affine.for %arg787 = 0 to 1 {
+            affine.for %arg788 = 0 to 1 {
+              %m1 = memref.alloc() : memref<1xf32, 3>
+              %m2 = memref.alloc() : memref<1xf32, 3>
+              affine.store %cst, %m1[0] : memref<1xf32, 3>
+              affine.store %cst, %m2[0] : memref<1xf32, 3>
+              %memref_2897, %asyncToken_2898 = gpu.alloc async [%0] () : memref<1x512x16x16xf32>
+              %2432 = gpu.memcpy async [%0] %memref_2897, %m0 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32>
+              affine.for %arg789 = 0 to 16 {
+                affine.for %arg790 = 0 to 16 {
+                  affine.store %cst, %memref_2897[0, %arg785 + %arg788, %arg789, %arg790] : memref<1x512x16x16xf32>
+                }
+              }
+              memref.dealloc %m2 : memref<1xf32, 3>
+              memref.dealloc %m1 : memref<1xf32, 3>
+              %2433 = gpu.memcpy async [%0] %m0, %memref_2897 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32>
+              %2434 = gpu.dealloc async [%asyncToken_2898] %memref_2897 : memref<1x512x16x16xf32>
+            }
+          }
+        }
+      }
+    }
+  }
+  // CHECK:      affine.for %{{.*}} = 0 to 1
+  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 1
+  // CHECK:          memref.alloc
+  // CHECK:          memref.alloc
+  // CHECK:          gpu.memcpy
+  // CHECK:          affine.for %{{.*}} = 0 to 16
+  // CHECK:            affine.for %{{.*}} = 0 to 16
+  // CHECK:          memref.dealloc
+  return
+}