[Mlir-commits] [mlir] 2d2d696 - [MLIR] Propagate input side effect information

Mon Apr 27 02:38:51 PDT 2020

Author: Tres Popp
Date: 2020-04-27T11:35:52+02:00
New Revision: 2d2d696137d729dcd2d83330e4d3b9e305f96640

URL: https://github.com/llvm/llvm-project/commit/2d2d696137d729dcd2d83330e4d3b9e305f96640
DIFF: https://github.com/llvm/llvm-project/commit/2d2d696137d729dcd2d83330e4d3b9e305f96640.diff

LOG: [MLIR] Propagate input side effect information

Summary:
Previously operations like std.load created methods for obtaining their
effects but did not inherit from the SideEffect interfaces when their
parameters were decorated with the information. The resulting situation
was that passes had no information on the SideEffects of std.load/store
and had to treat them more cautiously. This adds the inheritance
information when creating the methods.

As a side effect, many tests are modified, as they were using std.load
for testing and this oepration would be folded away as part of pattern
rewriting. Tests are modified to use store or to reutn the result of the
std.load.

Reviewers: mravishankar, antiagainst, nicolasvasilache, herhut, aartbik, ftynse!

Subscribers: mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, csigg, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, bader, grosul1, frgossen, Kayjukh, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78802

Added: 
    

Modified: 
    mlir/include/mlir/Interfaces/SideEffects.td
    mlir/include/mlir/Support/LLVM.h
    mlir/include/mlir/TableGen/OpClass.h
    mlir/include/mlir/TableGen/SideEffects.h
    mlir/lib/TableGen/OpClass.cpp
    mlir/lib/TableGen/SideEffects.cpp
    mlir/test/Conversion/StandardToSPIRV/legalization.mlir
    mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
    mlir/test/Dialect/Affine/canonicalize.mlir
    mlir/test/Dialect/GPU/all-reduce-max.mlir
    mlir/test/Dialect/GPU/all-reduce.mlir
    mlir/test/Dialect/Linalg/loops.mlir
    mlir/test/Dialect/Linalg/parallel_loops.mlir
    mlir/test/Transforms/canonicalize.mlir
    mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Interfaces/SideEffects.td b/mlir/include/mlir/Interfaces/SideEffects.td
index 8bb40728d7d1..4433f46706f4 100644

--- a/mlir/include/mlir/Interfaces/SideEffects.td
+++ b/mlir/include/mlir/Interfaces/SideEffects.td
@@ -118,6 +118,9 @@ class SideEffect<EffectOpInterfaceBase interface, string effectName,
   /// The name of the base effects class.
   string baseEffectName = interface.baseEffectName;
 
+  /// The parent interface that the effect belongs to.
+  string interfaceTrait = interface.trait;
+
   /// The derived effect that is being applied.
   string effect = effectName;
 

diff  --git a/mlir/include/mlir/Support/LLVM.h b/mlir/include/mlir/Support/LLVM.h
index 8d7dfc0b7f20..1887188bf3e2 100644
--- a/mlir/include/mlir/Support/LLVM.h
+++ b/mlir/include/mlir/Support/LLVM.h
@@ -49,6 +49,9 @@ class DenseMap;
 template <typename Fn> class function_ref;
 template <typename IteratorT> class iterator_range;
 template <typename T, typename ResultT> class TypeSwitch;
+class MallocAllocator;
+template <typename AllocatorTy>
+class StringSet;
 
 // Other common classes.
 class raw_ostream;
@@ -74,6 +77,8 @@ template <typename KeyT, typename ValueT,
 using DenseMap = llvm::DenseMap<KeyT, ValueT, KeyInfoT, BucketT>;
 template <typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT>>
 using DenseSet = llvm::DenseSet<ValueT, ValueInfoT>;
+template <typename AllocatorTy = llvm::MallocAllocator>
+using StringSet = llvm::StringSet<AllocatorTy>;
 template <typename Fn> using function_ref = llvm::function_ref<Fn>;
 using llvm::iterator_range;
 using llvm::MutableArrayRef;

diff  --git a/mlir/include/mlir/TableGen/OpClass.h b/mlir/include/mlir/TableGen/OpClass.h
index ced40fb1589e..8788a505a4b3 100644
--- a/mlir/include/mlir/TableGen/OpClass.h
+++ b/mlir/include/mlir/TableGen/OpClass.h
@@ -26,6 +26,7 @@
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 
 #include <string>
 
@@ -157,7 +158,8 @@ class OpClass : public Class {
 
 private:
   StringRef extraClassDeclaration;
-  SmallVector<std::string, 4> traits;
+  SmallVector<std::string, 4> traitsVec;
+  StringSet<> traitsSet;
   bool hasOperandAdaptor;
 };
 

diff  --git a/mlir/include/mlir/TableGen/SideEffects.h b/mlir/include/mlir/TableGen/SideEffects.h
index eb2a06803d0f..468010515252 100644
--- a/mlir/include/mlir/TableGen/SideEffects.h
+++ b/mlir/include/mlir/TableGen/SideEffects.h
@@ -29,6 +29,9 @@ class SideEffect : public Operator::VariableDecorator {
   // Return the name of the base C++ effect.
   StringRef getBaseEffectName() const;
 
+  // Return the name of the Interface that the effect belongs to.
+  StringRef getInterfaceTrait() const;
+
   // Return the name of the resource class.
   StringRef getResource() const;
 

diff  --git a/mlir/lib/TableGen/OpClass.cpp b/mlir/lib/TableGen/OpClass.cpp
index 4fb21fcddf3e..26519df72534 100644
--- a/mlir/lib/TableGen/OpClass.cpp
+++ b/mlir/lib/TableGen/OpClass.cpp
@@ -195,12 +195,15 @@ void tblgen::OpClass::setHasOperandAdaptorClass(bool has) {
   hasOperandAdaptor = has;
 }
 
-// Adds the given trait to this op.
-void tblgen::OpClass::addTrait(Twine trait) { traits.push_back(trait.str()); }
+void tblgen::OpClass::addTrait(Twine trait) {
+  auto traitStr = trait.str();
+  if (traitsSet.insert(traitStr).second)
+    traitsVec.push_back(std::move(traitStr));
+}
 
 void tblgen::OpClass::writeDeclTo(raw_ostream &os) const {
   os << "class " << className << " : public Op<" << className;
-  for (const auto &trait : traits)
+  for (const auto &trait : traitsVec)
     os << ", " << trait;
   os << "> {\npublic:\n";
   os << "  using Op::Op;\n";

diff  --git a/mlir/lib/TableGen/SideEffects.cpp b/mlir/lib/TableGen/SideEffects.cpp
index 7fbeffaafbdb..5d4f68581cca 100644
--- a/mlir/lib/TableGen/SideEffects.cpp
+++ b/mlir/lib/TableGen/SideEffects.cpp
@@ -24,6 +24,10 @@ StringRef SideEffect::getBaseEffectName() const {
   return def->getValueAsString("baseEffectName");
 }
 
+StringRef SideEffect::getInterfaceTrait() const {
+  return def->getValueAsString("interfaceTrait");
+}
+
 StringRef SideEffect::getResource() const {
   auto value = def->getValueAsString("resource");
   return value.empty() ? "::mlir::SideEffects::DefaultResource" : value;

diff  --git a/mlir/test/Conversion/StandardToSPIRV/legalization.mlir b/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
index d81036edf67d..3540a101c55b 100644
--- a/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
@@ -2,7 +2,7 @@
 
 // CHECK-LABEL: @fold_static_stride_subview_with_load
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index
-func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) {
+func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> f32 {
   // CHECK-NOT: subview
   // CHECK: [[C2:%.*]] = constant 2 : index
   // CHECK: [[C3:%.*]] = constant 3 : index
@@ -13,12 +13,12 @@ func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : in
   // CHECK: load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
   %0 = subview %arg0[%arg1, %arg2][][] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
   %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
-  return
+  return %1 : f32
 }
 
 // CHECK-LABEL: @fold_dynamic_stride_subview_with_load
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: index, [[ARG6:%.*]]: index
-func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) {
+func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> f32 {
   // CHECK-NOT: subview
   // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[ARG5]] : index
   // CHECK: [[INDEX1:%.*]] = addi [[ARG1]], [[STRIDE1]] : index
@@ -27,7 +27,7 @@ func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : i
   // CHECK: load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
   %0 = subview %arg0[%arg1, %arg2][][%arg5, %arg6] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [?, ?]>
   %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [?, ?]>
-  return
+  return %1 : f32
 }
 
 // CHECK-LABEL: @fold_static_stride_subview_with_store

diff  --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
index 87004c2b61c8..c601cf98dcd6 100644
--- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
@@ -64,7 +64,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %{{.*}} {
   // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
   //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
-  // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector.type_cast %[[ALLOC]] : memref<5x4x3xf32>
   // CHECK-NEXT:          loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
   // CHECK-NEXT:            loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
   // CHECK-NEXT:              loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
@@ -99,7 +98,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:              }
   // CHECK-NEXT:            }
   // CHECK-NEXT:          }
-  //      CHECK:          {{.*}} = load %[[VECTOR_VIEW]][] : memref<vector<5x4x3xf32>>
   // CHECK-NEXT:          dealloc %[[ALLOC]] : memref<5x4x3xf32>
   // CHECK-NEXT:        }
   // CHECK-NEXT:      }

diff  --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
index 90437ac7ce5f..fc4fcd45231e 100644
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -54,30 +54,30 @@ func @compose_affine_maps_1dto2d_no_symbols() {
     %x1_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%x0, %x0)
 
     // CHECK: [[I0A:%[0-9]+]] = affine.apply [[MAP0]](%{{.*}})
-    // CHECK-NEXT: load %0{{\[}}[[I0A]], [[I0A]]{{\]}}
+    // CHECK-NEXT: [[V0:%[0-9]+]] = load %0{{\[}}[[I0A]], [[I0A]]{{\]}}
     %v0 = load %0[%x1_0, %x1_1] : memref<4x4xf32>
 
-    // Test load[%y, %y]
+    // Test store[%y, %y]
     %y0 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0)
     %y1_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%y0, %y0)
     %y1_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%y0, %y0)
 
     // CHECK-NEXT: [[I1A:%[0-9]+]] = affine.apply [[MAP1]](%{{.*}})
-    // CHECK-NEXT: load %0{{\[}}[[I1A]], [[I1A]]{{\]}}
-    %v1 = load %0[%y1_0, %y1_1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %0{{\[}}[[I1A]], [[I1A]]{{\]}}
+    store %v0, %0[%y1_0, %y1_1] : memref<4x4xf32>
 
-    // Test load[%x, %y]
+    // Test store[%x, %y]
     %xy_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%x0, %y0)
     %xy_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%x0, %y0)
 
-    // CHECK-NEXT: load %0{{\[}}[[I0A]], [[I1A]]{{\]}}
-    %v2 = load %0[%xy_0, %xy_1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %0{{\[}}[[I0A]], [[I1A]]{{\]}}
+    store %v0, %0[%xy_0, %xy_1] : memref<4x4xf32>
 
-    // Test load[%y, %x]
+    // Test store[%y, %x]
     %yx_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%y0, %x0)
     %yx_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%y0, %x0)
-    // CHECK-NEXT: load %0{{\[}}[[I1A]], [[I0A]]{{\]}}
-    %v3 = load %0[%yx_0, %yx_1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %0{{\[}}[[I1A]], [[I0A]]{{\]}}
+    store %v0, %0[%yx_0, %yx_1] : memref<4x4xf32>
   }
   return
 }
@@ -92,29 +92,29 @@ func @compose_affine_maps_1dto2d_with_symbols() {
     %x0 = affine.apply affine_map<(d0)[s0] -> (d0 - s0)> (%i0)[%c4]
 
     // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP4]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I0]]{{\]}}
+    // CHECK-NEXT: [[V0:%[0-9]+]] = load %{{[0-9]+}}{{\[}}[[I0]], [[I0]]{{\]}}
     %v0 = load %0[%x0, %x0] : memref<4x4xf32>
 
     // Test load[%x0, %x1] with symbol %c4 captured by '%x0' map.
     %x1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0)
     %y1 = affine.apply affine_map<(d0, d1) -> (d0+d1)> (%x0, %x1)
     // CHECK-NEXT: [[I1:%[0-9]+]] = affine.apply [[MAP7]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I1]], [[I1]]{{\]}}
-    %v1 = load %0[%y1, %y1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I1]], [[I1]]{{\]}}
+    store %v0, %0[%y1, %y1] : memref<4x4xf32>
 
-    // Test load[%x1, %x0] with symbol %c4 captured by '%x0' map.
+    // Test store[%x1, %x0] with symbol %c4 captured by '%x0' map.
     %y2 = affine.apply affine_map<(d0, d1) -> (d0 + d1)> (%x1, %x0)
     // CHECK-NEXT: [[I2:%[0-9]+]] = affine.apply [[MAP7]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I2]], [[I2]]{{\]}}
-    %v2 = load %0[%y2, %y2] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I2]], [[I2]]{{\]}}
+    store %v0, %0[%y2, %y2] : memref<4x4xf32>
 
-    // Test load[%x2, %x0] with symbol %c4 from '%x0' and %c5 from '%x2'
+    // Test store[%x2, %x0] with symbol %c4 from '%x0' and %c5 from '%x2'
     %c5 = constant 5 : index
     %x2 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)> (%i0)[%c5]
     %y3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)> (%x2, %x0)
     // CHECK: [[I3:%[0-9]+]] = affine.apply [[MAP7a]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I3]], [[I3]]{{\]}}
-    %v3 = load %0[%y3, %y3] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I3]], [[I3]]{{\]}}
+    store %v0, %0[%y3, %y3] : memref<4x4xf32>
   }
   return
 }
@@ -175,15 +175,15 @@ func @compose_affine_maps_dependent_loads() {
         // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP9]](%{{.*}})
         // CHECK: [[I1:%[0-9]+]] = affine.apply [[MAP4b]](%{{.*}})
         // CHECK: [[I2:%[0-9]+]] = affine.apply [[MAP10]](%{{.*}})
-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I1]]{{\]}}
+        // CHECK-NEXT: [[V0:%[0-9]+]] = load %{{[0-9]+}}{{\[}}[[I0]], [[I1]]{{\]}}
         %v0 = load %0[%x00, %x01] : memref<16x32xf32>
 
-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I2]]{{\]}}
-        %v1 = load %0[%x00, %x02] : memref<16x32xf32>
+        // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I0]], [[I2]]{{\]}}
+        store %v0, %0[%x00, %x02] : memref<16x32xf32>
 
         // Swizzle %i0, %i1
-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I1]], [[I0]]{{\]}}
-        %v2 = load %0[%x01, %x00] : memref<16x32xf32>
+        // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I1]], [[I0]]{{\]}}
+        store %v0, %0[%x01, %x00] : memref<16x32xf32>
 
         // Swizzle %x00, %x01 and %c3, %c7
         %x10 = affine.apply affine_map<(d0, d1)[s0, s1] -> (d0 * s1)>
@@ -193,18 +193,16 @@ func @compose_affine_maps_dependent_loads() {
 
         // CHECK-NEXT: [[I2A:%[0-9]+]] = affine.apply [[MAP12]](%{{.*}})
         // CHECK-NEXT: [[I2B:%[0-9]+]] = affine.apply [[MAP11]](%{{.*}})
-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I2A]], [[I2B]]{{\]}}
-        %v3 = load %0[%x10, %x11] : memref<16x32xf32>
+        // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I2A]], [[I2B]]{{\]}}
+        store %v0, %0[%x10, %x11] : memref<16x32xf32>
       }
     }
   }
   return
 }
 
-// CHECK-LABEL: func @compose_affine_maps_diamond_dependency() {
-func @compose_affine_maps_diamond_dependency() {
-  %0 = alloc() : memref<4x4xf32>
-
+// CHECK-LABEL: func @compose_affine_maps_diamond_dependency
+func @compose_affine_maps_diamond_dependency(%arg0: f32, %arg1: memref<4x4xf32>) {
   affine.for %i0 = 0 to 15 {
     %a = affine.apply affine_map<(d0) -> (d0 - 1)> (%i0)
     %b = affine.apply affine_map<(d0) -> (d0 + 7)> (%a)
@@ -213,15 +211,15 @@ func @compose_affine_maps_diamond_dependency() {
     %d1 = affine.apply affine_map<(d0, d1) -> (d1 floordiv 3)> (%b, %c)
     // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP13A]](%{{.*}})
     // CHECK: [[I1:%[0-9]+]] = affine.apply [[MAP13B]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I1]]{{\]}}
-    %v = load %0[%d0, %d1] : memref<4x4xf32>
+    // CHECK-NEXT: store %arg0, %arg1{{\[}}[[I0]], [[I1]]{{\]}}
+    store %arg0, %arg1[%d0, %d1] : memref<4x4xf32>
   }
 
   return
 }
 
 // CHECK-LABEL: func @arg_used_as_dim_and_symbol
-func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
+func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index, %arg2: f32) {
   %c9 = constant 9 : index
   %1 = alloc() : memref<100x100xf32, 1>
   %2 = alloc() : memref<1xi32>
@@ -231,8 +229,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
         (%i0, %i1)[%arg1, %c9]
       %4 = affine.apply affine_map<(d0, d1, d3) -> (d3 - (d0 + d1))>
         (%arg1, %c9, %3)
-      // CHECK: load %{{[0-9]+}}{{\[}}%{{.*}}, %{{.*}}{{\]}}
-      %5 = load %1[%4, %arg1] : memref<100x100xf32, 1>
+      // CHECK: store %arg2, %{{[0-9]+}}{{\[}}%{{.*}}, %{{.*}}{{\]}}
+      store %arg2, %1[%4, %arg1] : memref<100x100xf32, 1>
     }
   }
   return
@@ -252,7 +250,7 @@ func @trivial_maps() {
 
     %3 = affine.apply affine_map<()[] -> (0)>()[]
     store %cst, %0[%3] : memref<10xf32>
-    %4 = load %0[%c0] : memref<10xf32>
+    store %2, %0[%c0] : memref<10xf32>
   }
   return
 }

diff  --git a/mlir/test/Dialect/GPU/all-reduce-max.mlir b/mlir/test/Dialect/GPU/all-reduce-max.mlir
index 5c94bd4a67da..142228dc276e 100644
--- a/mlir/test/Dialect/GPU/all-reduce-max.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce-max.mlir
@@ -195,7 +195,6 @@ gpu.module @kernels {
     // CHECK:   br ^bb42
     // CHECK: ^bb42:
     // CHECK:   gpu.barrier
-    // CHECK:   [[VAL_134:%.*]] = load [[VAL_1]]{{\[}}[[VAL_4]]] : memref<32xf32, 3>
     %sum = "gpu.all_reduce"(%arg0) ({}) {op = "max"} : (f32) -> (f32)
     gpu.return
   }

diff  --git a/mlir/test/Dialect/GPU/all-reduce.mlir b/mlir/test/Dialect/GPU/all-reduce.mlir
index ff7986340ac4..491d9b32fe91 100644
--- a/mlir/test/Dialect/GPU/all-reduce.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce.mlir
@@ -175,7 +175,6 @@ gpu.module @kernels {
     // CHECK:   br ^bb42
     // CHECK: ^bb42:
     // CHECK:   gpu.barrier
-    // CHECK:   [[VAL_114:%.*]] = load [[VAL_1]]{{\[}}[[VAL_4]]] : memref<32xf32, 3>
     %sum = "gpu.all_reduce"(%arg0) ({}) {op = "add"} : (f32) -> (f32)
     gpu.return
   }

diff  --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index 3751c105f310..7c71dbf893c9 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -856,7 +856,6 @@ func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>)
 //   CHECKLOOP-NOT: loop.for
 //   CHECKLOOP-DAG: load %[[ARG0]][]
 //   CHECKLOOP-DAG: load %[[ARG1]][]
-//   CHECKLOOP-DAG: load %[[ARG2]][]
 //       CHECKLOOP: addf
 //       CHECKLOOP: store %{{.*}}, %[[ARG2]][]
 
@@ -867,6 +866,5 @@ func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>)
 //   CHECKPARALLEL-NOT: loop.for
 //   CHECKPARALLEL-DAG: load %[[ARG0]][]
 //   CHECKPARALLEL-DAG: load %[[ARG1]][]
-//   CHECKPARALLEL-DAG: load %[[ARG2]][]
 //       CHECKPARALLEL: addf
 //       CHECKPARALLEL: store %{{.*}}, %[[ARG2]][]

diff  --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir
index 1c7aee614b8b..15a3ed210d9d 100644
--- a/mlir/test/Dialect/Linalg/parallel_loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir
@@ -24,7 +24,6 @@ func @linalg_generic_sum(%lhs: memref<2x2xf32>,
 // CHECK: loop.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
 // CHECK:   %[[LHS_ELEM:.*]] = load %[[LHS]][%[[I]], %[[J]]]
 // CHECK:   %[[RHS_ELEM:.*]] = load %[[RHS]][%[[I]], %[[J]]]
-// CHECK:   %[[SUM_ELEM:.*]] = load %[[SUM]][%[[I]], %[[J]]]
 // CHECK:   %[[SUM:.*]] = addf %[[LHS_ELEM]], %[[RHS_ELEM]] : f32
 // CHECK:   store %[[SUM]], %{{.*}}[%[[I]], %[[J]]]
 // CHECK:   loop.yield
@@ -60,4 +59,4 @@ func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {
 //       CHECK:   loop.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
 //       CHECK:     loop.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]]
 //       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-//       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]
\ No newline at end of file
+//       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]

diff  --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index 2524d1c7cbad..6528d10ad5cf 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -56,6 +56,16 @@ func @trivial_dce(%arg0: tensor<8x4xf32>) {
   return
 }
 
+// CHECK-LABEL: func @load_dce
+func @load_dce(%arg0: index) {
+  %c4 = constant 4 : index
+  %a = alloc(%c4) : memref<?xf32>
+  %2 = load %a[%arg0] : memref<?xf32>
+  dealloc %a: memref<?xf32>
+  // CHECK-NEXT: return
+  return
+}
+
 // CHECK-LABEL: func @addi_zero
 func @addi_zero(%arg0: i32) -> i32 {
   // CHECK-NEXT: return %arg0
@@ -648,7 +658,7 @@ func @cast_values(%arg0: tensor<*xi32>, %arg1: memref<?xi32>) -> (tensor<2xi32>,
 // CHECK-DAG: #[[VIEW_MAP5:map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 7 + d1)>
 
 // CHECK-LABEL: func @view
-func @view(%arg0 : index) {
+func @view(%arg0 : index) -> (f32, f32, f32, f32, f32, f32) {
   // CHECK: %[[ALLOC_MEM:.*]] = alloc() : memref<2048xi8>
   %0 = alloc() : memref<2048xi8>
   %c0 = constant 0 : index
@@ -660,41 +670,41 @@ func @view(%arg0 : index) {
   // CHECK: std.view %[[ALLOC_MEM]][][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP0]]>
   %1 = view %0[%c15][%c7, %c11]
     : memref<2048xi8> to memref<?x?xf32, #TEST_VIEW_MAP0>
-  load %1[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
+  %r0 = load %1[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
 
   // Test: fold constant sizes but not offset, update map with static stride.
   // Test that we do not a fold dynamic dim which is not produced by a constant.
   // CHECK: std.view %[[ALLOC_MEM]][%arg0][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP1]]>
   %2 = view %0[%arg0][%c7, %c11]
     : memref<2048xi8> to memref<?x?xf32, #TEST_VIEW_MAP0>
-  load %2[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
+  %r1 = load %2[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
 
   // Test: fold constant offset but not sizes, update map with constant offset.
   // Test that we fold constant offset but not dynamic dims.
   // CHECK: std.view %[[ALLOC_MEM]][][%arg0, %arg0] : memref<2048xi8> to memref<?x?xf32, #[[VIEW_MAP2]]>
   %3 = view %0[%c15][%arg0, %arg0]
     : memref<2048xi8> to memref<?x?xf32,  #TEST_VIEW_MAP0>
-  load %3[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
+  %r2 = load %3[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
 
   // Test: fold one constant dim, no offset, should update with constant
   // stride on dim 1, but leave dynamic stride on dim 0.
   // CHECK: std.view %[[ALLOC_MEM]][][%arg0, %arg0] : memref<2048xi8> to memref<?x?x7xf32, #[[VIEW_MAP3]]>
   %4 = view %0[][%arg0, %arg0, %c7]
     : memref<2048xi8> to memref<?x?x?xf32, #TEST_VIEW_MAP1>
-  load %4[%c0, %c0, %c0] : memref<?x?x?xf32, #TEST_VIEW_MAP1>
+  %r3 = load %4[%c0, %c0, %c0] : memref<?x?x?xf32, #TEST_VIEW_MAP1>
 
   // Test: preserve an existing static dim size while folding a dynamic
   // dimension and offset.
   // CHECK: std.view %[[ALLOC_MEM]][][] : memref<2048xi8> to memref<7x4xf32, #[[VIEW_MAP4]]>
   %5 = view %0[%c15][%c7] : memref<2048xi8> to memref<?x4xf32, #TEST_VIEW_MAP2>
-  load %5[%c0, %c0] : memref<?x4xf32, #TEST_VIEW_MAP2>
+  %r4 = load %5[%c0, %c0] : memref<?x4xf32, #TEST_VIEW_MAP2>
 
   // Test: folding static alloc and memref_cast into a view.
   // CHECK: std.view %[[ALLOC_MEM]][][] : memref<2048xi8> to memref<15x7xf32, #[[VIEW_MAP5]]>
   %6 = memref_cast %0 : memref<2048xi8> to memref<?xi8>
   %7 = view %6[%c15][%c7] : memref<?xi8> to memref<?x?xf32>
-  load %7[%c0, %c0] : memref<?x?xf32>
-  return
+  %r5 = load %7[%c0, %c0] : memref<?x?xf32>
+  return %r0, %r1, %r2, %r3, %r4, %r5 : f32, f32, f32, f32, f32, f32
 }
 
 // -----
@@ -735,7 +745,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
     : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
       memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %1[%c0, %c0, %c0] : memref<?x?x?xf32,
+  %v0 = load %1[%c0, %c0, %c0] : memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
 
   // Test: subview with one dynamic operand should not be folded.
@@ -744,7 +754,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
     : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
       memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %2[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %2[%c0, %c0, %c0] : memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
 
   // CHECK: %[[ALLOC1:.*]] = alloc(%[[ARG0]])
@@ -755,7 +765,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
     : memref<?x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
       memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %4[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %4[%c0, %c0, %c0] : memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
 
   // Test: subview offset operands are folded correctly w.r.t. base strides.
@@ -764,7 +774,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
     : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
       memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %5[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %5[%c0, %c0, %c0] : memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
 
   // Test: subview stride operands are folded correctly w.r.t. base strides.
@@ -773,40 +783,40 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
     : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
       memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %6[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %6[%c0, %c0, %c0] : memref<?x?x?xf32,
        affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
 
   // Test: subview shape are folded, but offsets and strides are not even if base memref is static
   // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref<7x11x2xf32, #[[SUBVIEW_MAP3]]>
   %10 = subview %0[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] : memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %10[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %10[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // Test: subview strides are folded, but offsets and shape are not even if base memref is static
   // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref<?x?x?xf32, #[[SUBVIEW_MAP4]]
   %11 = subview %0[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c7, %c11] : memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %11[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %11[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // Test: subview offsets are folded, but strides and shape are not even if base memref is static
   // CHECK: subview %[[ALLOC0]][] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref<?x?x?xf32, #[[SUBVIEW_MAP5]]
   %13 = subview %0[%c1, %c2, %c7] [%arg1, %arg1, %arg1] [%arg0, %arg0, %arg0] :  memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %13[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %13[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // CHECK: %[[ALLOC2:.*]] = alloc(%[[ARG0]], %[[ARG0]], %[[ARG1]])
   %14 = alloc(%arg0, %arg0, %arg1) : memref<?x?x?xf32>
   // Test: subview shape are folded, even if base memref is not static
   // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] : memref<?x?x?xf32> to memref<7x11x2xf32, #[[SUBVIEW_MAP3]]>
   %15 = subview %14[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] : memref<?x?x?xf32> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // TEST: subview strides are not folded when the base memref is not static
   // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[C2]], %[[C2]], %[[C2]]] : memref<?x?x?xf32> to memref<?x?x?xf32, #[[SUBVIEW_MAP3]]
   %16 = subview %14[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c2, %c2] : memref<?x?x?xf32> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // TEST: subview offsets are not folded when the base memref is not static
   // CHECK: subview %[[ALLOC2]][%[[C1]], %[[C1]], %[[C1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] : memref<?x?x?xf32> to memref<?x?x?xf32, #[[SUBVIEW_MAP3]]
   %17 = subview %14[%c1, %c1, %c1] [%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] : memref<?x?x?xf32> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // CHECK: %[[ALLOC3:.*]] = alloc() : memref<12x4xf32>
   %18 = alloc() : memref<12x4xf32>
@@ -815,12 +825,12 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
   // TEST: subview strides are maintained when sizes are folded
   // CHECK: subview %[[ALLOC3]][%arg1, %arg1] [] [] : memref<12x4xf32> to memref<2x4xf32, #[[SUBVIEW_MAP6]]>
   %19 = subview %18[%arg1, %arg1] [%c2, %c4] [] : memref<12x4xf32> to memref<?x?xf32, offset: ?, strides:[4, 1]>
-  load %19[%arg1, %arg1] : memref<?x?xf32, offset: ?, strides:[4, 1]>
+  store %v0, %19[%arg1, %arg1] : memref<?x?xf32, offset: ?, strides:[4, 1]>
 
   // TEST: subview strides and sizes are maintained when offsets are folded
   // CHECK: subview %[[ALLOC3]][] [] [] : memref<12x4xf32> to memref<12x4xf32, #[[SUBVIEW_MAP7]]>
   %20 = subview %18[%c2, %c4] [] [] : memref<12x4xf32> to memref<12x4xf32, offset: ?, strides:[4, 1]>
-  load %20[%arg1, %arg1] : memref<12x4xf32, offset: ?, strides:[4, 1]>
+  store %v0, %20[%arg1, %arg1] : memref<12x4xf32, offset: ?, strides:[4, 1]>
 
   // Test: dim on subview is rewritten to size operand.
   %7 = dim %4, 0 : memref<?x?x?xf32,

diff  --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 2d6c91f31817..ad9c070c4530 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -1286,9 +1286,11 @@ void OpEmitter::genSideEffectInterfaceMethods() {
   auto resolveDecorators = [&](Operator::var_decorator_range decorators,
                                unsigned index, unsigned kind) {
     for (auto decorator : decorators)
-      if (SideEffect *effect = dyn_cast<SideEffect>(&decorator))
+      if (SideEffect *effect = dyn_cast<SideEffect>(&decorator)) {
+        opClass.addTrait(effect->getInterfaceTrait());
         interfaceEffects[effect->getBaseEffectName()].push_back(
             EffectLocation{*effect, index, kind});
+      }
   };
 
   // Collect effects that were specified via: