[Mlir-commits] [mlir] [mlir] [linalg] Check for dim shape to decide unit dim for each operand in dropUnitDims pass. (PR #91673)

Wed May 15 04:47:18 PDT 2024

https://github.com/sahas3 updated https://github.com/llvm/llvm-project/pull/91673

>From 9f2d5017271ff7148d405cd34ce8672b4e2e0534 Mon Sep 17 00:00:00 2001
From: Sayan Saha <sayans at mathworks.com>
Date: Thu, 9 May 2024 16:56:30 -0400
Subject: [PATCH 1/4] [BugFix] : Check dim shape in isUnitDim for each operand
 in dropUnitDims.

---
 .../Linalg/Transforms/DropUnitDims.cpp        |  3 +-
 .../Dialect/Linalg/drop-unit-extent-dims.mlir | 44 +++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
index 65efa18af18f6..c0829397f1f85 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -351,7 +351,8 @@ static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata(
   auto isUnitDim = [&](unsigned dim) {
     if (auto dimExpr = dyn_cast<AffineDimExpr>(exprs[dim])) {
       unsigned oldPosition = dimExpr.getPosition();
-      return !oldDimsToNewDimsMap.count(oldPosition);
+      return !oldDimsToNewDimsMap.count(oldPosition) &&
+             (operandShape[dim] == 1);
     }
     // Handle the other case where the shape is 1, and is accessed using a
     // constant 0.
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
index a9cbaaf7fdc48..d31ceb5fd719f 100644
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -1087,3 +1087,47 @@ func.func @drop_known_unit_constant_low_high(%arg0: tensor<1x383x128xf32>) -> te
 //       CHECK:   } : tensor<383x128xf32> to tensor<384x128xf32>
 //       CHECK:   tensor.expand_shape %[[PADDED]]
 //  CHECK-SAME:     {{\[}}[0, 1], [2]] output_shape [1, 384, 128] : tensor<384x128xf32> into tensor<1x384x128xf32>
+
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (0, d0)>
+// CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> ()>
+
+// CHECK-LABEL: func @drop_unit_dim_corresponding_to_dynamic_dim
+// CHECK-SAME:                    %[[ARG0:.*]]: tensor<1x?x?x1xf32>,
+// CHECK-SAME:                    %[[ARG1:.*]]: index) -> tensor<?x1x61x1xf32> {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant dense<1.000000e+00> : tensor<f32>
+// CHECK:           %[[VAL_3:.*]] = tensor.collapse_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32>
+// CHECK:           %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]]
+// CHECK:           %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%[[VAL_6]] : tensor<?x61xf32>) {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32, %[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32):
+// CHECK:             %[[VAL_12:.*]] = arith.mulf %[[VAL_8]], %[[VAL_9]] : f32
+// CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_10]], %[[VAL_12]] : f32
+// CHECK:             linalg.yield %[[VAL_13]] : f32
+// CHECK:           } -> tensor<?x61xf32>
+// CHECK:           %[[VAL_14:.*]] = tensor.expand_shape %[[VAL_7]] {{\[\[}}0, 1], [2, 3]] output_shape {{\[}}%[[VAL_0]], 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32>
+// CHECK:           return %[[VAL_14]] : tensor<?x1x61x1xf32>
+// CHECK:         }
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
+module {
+  func.func @drop_unit_dim_corresponding_to_dynamic_dim(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
+    %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
+    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
+    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
+    ^bb0(%in: f32, %in_0: f32, %out: f32):
+      %2 = arith.mulf %in, %in_0 : f32
+      %3 = arith.addf %out, %2 : f32
+      linalg.yield %3 : f32
+    } -> tensor<?x1x61x1xf32>
+    return %1 : tensor<?x1x61x1xf32>
+  }
+}

>From 84164741da23ec8560f2ed80da55dc0b18f0c6d5 Mon Sep 17 00:00:00 2001
From: Sayan Saha <sayans at mathworks.com>
Date: Tue, 14 May 2024 11:02:09 -0400
Subject: [PATCH 2/4] [BugFix] : Relax projectedPermutation chek + add
 GenericOp verifier.

---
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp      | 52 ++++++++--
 .../Linalg/Transforms/DropUnitDims.cpp        |  3 +-
 mlir/test/Dialect/Linalg/canonicalize.mlir    | 96 +++++++------------
 .../Dialect/Linalg/drop-unit-extent-dims.mlir | 44 ---------
 4 files changed, 80 insertions(+), 115 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 6a5f25a7605f1..b7890a4cc1377 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1147,7 +1147,51 @@ void GenericOp::getEffects(
                         getDpsInits());
 }
 
-LogicalResult GenericOp::verify() { return success(); }
+static LogicalResult commonOpVerifier(LinalgOp linalgOp) {
+  llvm::DenseMap<AffineExpr, int64_t> affineExprToSize;
+
+  for (OpOperand &opOperand : linalgOp->getOpOperands()) {
+    if (linalgOp.isScalar(&opOperand))
+      continue;
+
+    auto sourceMap = linalgOp.getMatchingIndexingMap(&opOperand);
+
+    Value source = opOperand.get();
+    if (auto sourceType = llvm::cast<ShapedType>(source.getType())) {
+      auto sourceShape = sourceType.getShape();
+      bool isInputOperand = linalgOp.isDpsInput(&opOperand);
+
+      for (unsigned i = 0; i < sourceShape.size(); i++) {
+        int64_t dimShape = sourceShape[i];
+        if (auto affineDimExpr =
+                dyn_cast<AffineDimExpr>(sourceMap.getResult(i))) {
+          if (isInputOperand) {
+            if (!affineExprToSize.contains(affineDimExpr) &&
+                !sourceType.isDynamicDim(i))
+              // For input operands populate affineExprToSize to hold onto the
+              // shape for an affineDimExpr if the shape isn't populated
+              // already. For dynamic shape overwrite the shape content (if
+              // needed), this accounts for broadcasting where one input can
+              // have fixed shape but the others can have dynamic shape for the
+              // same dimension
+              affineExprToSize.try_emplace(affineDimExpr, dimShape);
+          } else if (affineExprToSize.contains(affineDimExpr)
+                          && affineExprToSize[affineDimExpr] != dimShape)
+            // If shape for a affineDimExpr is already known from the input
+            // operand's map ensure that the shapes match across the output
+            // operands.
+            return linalgOp->emitError("Mis-match in dimension shape ")
+                   << affineDimExpr.getPosition() << " inferred from the maps.";
+        }
+      }
+    }
+  }
+  return success();
+}
+
+LogicalResult GenericOp::verify() {
+  return commonOpVerifier(*this);
+}
 
 namespace {
 
@@ -2347,12 +2391,6 @@ struct InferStaticShapeOfOperands : public OpInterfaceRewritePattern<LinalgOp> {
     if (!linalgOp.hasPureTensorSemantics())
       return failure();
 
-    // Maps must be projected permutations.
-    if (llvm::any_of(linalgOp.getIndexingMapsArray(), [](AffineMap map) {
-          return !map.isProjectedPermutation();
-        }))
-      return failure();
-
     // Maps affine dim expressions to the static size of that dimension.
     llvm::DenseMap<AffineExpr, int64_t> affineExprToSize;
     Location loc = linalgOp.getLoc();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
index c0829397f1f85..65efa18af18f6 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -351,8 +351,7 @@ static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata(
   auto isUnitDim = [&](unsigned dim) {
     if (auto dimExpr = dyn_cast<AffineDimExpr>(exprs[dim])) {
       unsigned oldPosition = dimExpr.getPosition();
-      return !oldDimsToNewDimsMap.count(oldPosition) &&
-             (operandShape[dim] == 1);
+      return !oldDimsToNewDimsMap.count(oldPosition);
     }
     // Handle the other case where the shape is 1, and is accessed using a
     // constant 0.
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 19cea6c2066c9..52e311ddbae5a 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -497,30 +497,23 @@ func.func @no_fold_pad_fill_value_mismatch() -> tensor<412x276xf32> {
 
 // Tests below verify whether static information is propagated through all the operands of generic op.
 // 1. If one of the inputs of generic op has static info and it has no cast source.
-// 2. If one of the inputs of generic op has static info and it is coming from tensr.cast operation.
-// 3. If one of the outputs of generic op has static info and it is coming from tenso.cast operation.
+// 2. If one of the inputs of generic op has static info and it is coming from tensor.cast operation.
+// 3. If one of the outputs of generic op has static info and it is coming from tensor.cast operation.
 #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 // CHECK-LABEL: func @static_input_without_cast
 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
 func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c2 = arith.constant 2 : index
-  %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
-  %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
-  %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
-  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
-  %4 = linalg.generic {
+  %0 = tensor.empty() : tensor<2x3x4xf32>
+  %1 = linalg.generic {
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<2x3x4xf32>, tensor<?x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    outs(%0 : tensor<2x3x4xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
-  } -> (tensor<?x?x?xf32>)
-  %5 = tensor.cast %4 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-  return %5 : tensor<2x3x4xf32>
+  } -> (tensor<2x3x4xf32>)
+  return %1 : tensor<2x3x4xf32>
     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
@@ -533,25 +526,18 @@ func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x
 // CHECK-LABEL: func @static_input_with_cast
 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
 func.func @static_input_with_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c2 = arith.constant 2 : index
-  %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
-  %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
-  %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
-  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
-  %4 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
-  %5 = linalg.generic {
+  %0 = tensor.empty() : tensor<2x3x4xf32>
+  %1 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
+  %2 = linalg.generic {
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
-  } ins(%arg0, %4 : tensor<2x3x4xf32>, tensor<2x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+  } ins(%arg0, %1 : tensor<2x3x4xf32>, tensor<2x?x?xf32>)
+    outs(%0 : tensor<2x3x4xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
-  } -> (tensor<?x?x?xf32>)
-  %6 = tensor.cast %5 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-  return %6: tensor<2x3x4xf32>
+  } -> (tensor<2x3x4xf32>)
+  return %2: tensor<2x3x4xf32>
     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
@@ -598,53 +584,39 @@ func.func @static_output_with_cast(%arg0 : tensor<?x?x?xf32>, %arg1: tensor<?x?x
 // CHECK-LABEL: func @cast_source
 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
 func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c2 = arith.constant 2 : index
-  %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
-  %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
-  %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
-  %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
-  %4 = tensor.cast %arg0 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
-  %5 = tensor.cast %arg1 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
-  %6 = linalg.generic {
+  %0 = tensor.empty() : tensor<2x3x4xf32>
+  %1 = tensor.cast %arg0 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
+  %2 = tensor.cast %arg1 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
+  %3 = linalg.generic {
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
-  } ins(%4, %5 : tensor<2x?x?xf32>, tensor<2x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+  } ins(%1, %2 : tensor<2x?x?xf32>, tensor<2x?x?xf32>)
+    outs(%0 : tensor<2x3x4xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
-  } -> (tensor<?x?x?xf32>)
-  %7 = tensor.cast %6 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-  return %7: tensor<2x3x4xf32>
+  } -> (tensor<2x3x4xf32>)
+  return %3: tensor<2x3x4xf32>
     //  CHECK:      %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
     //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
 }
 
+
 // -----
 
 #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 // CHECK-LABEL: func @cast_dest
-// CHECK-SAME:  (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<1x?x?xf32>,
-func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2: index, %arg3: index, %arg4: index) -> tensor<?x?x?xf32> {
-  %0 = tensor.empty(%arg2, %arg3, %arg4) : tensor<?x?x?xf32>
-  %1 = tensor.cast %arg1 : tensor<1x?x?xf32> to tensor<?x?x?xf32>
-  %2 = linalg.generic {
-    indexing_maps = [#map, #map, #map],
-    iterator_types = ["parallel", "parallel", "parallel"]
-  } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<1x?x?xf32>)
-    outs(%0 : tensor<?x?x?xf32>) {
-  ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
-    %3 = arith.subf %arg5, %arg6 : f32
-    linalg.yield %3 : f32
-  } -> tensor<?x?x?xf32>
-  return %2 : tensor<?x?x?xf32>
-// CHECK:      %[[GENERIC_OP:.*]] = linalg.generic
-// CHECK-SAME: ins(%{{.*}}, %[[ARG1]] : tensor<1x?x?xf32>, tensor<1x?x?xf32>)
-// CHECK-SAME: outs(%{{.*}} : tensor<1x?x?xf32>)
-// CHECK: tensor.cast %[[GENERIC_OP]] : tensor<1x?x?xf32> to tensor<?x?x?xf32>
+// CHECK-SAME:  %[[ARG0:.*]]: tensor<1x?x?x1xf32>, %[[ARG1:.*]]: index
+func.func @cast_dest(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
+    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
+    %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
+%1 = linalg.conv_2d_nhwc_hwcf ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) -> tensor<?x1x61x1xf32>
+return %1 : tensor<?x1x61x1xf32>
+// CHECK:      %[[CONV_OP:.*]] = linalg.conv_2d_nhwc_hwcf 
+// CHECK-SAME: ins(%[[ARG0]], %{{.*}} : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) 
+// CHECK-SAME: outs(%{{.*}} : tensor<1x1x61x1xf32>)
+// CHECK: tensor.cast %[[CONV_OP]] : tensor<1x1x61x1xf32> to tensor<?x1x61x1xf32>
 }
 
 // -----
@@ -1017,7 +989,7 @@ func.func @broadcast_same_shape(%input: tensor<2x3xf32>, %init: tensor<2x3xf32>)
   return %0 : tensor<2x3xf32>
 }
 
-// ----
+// -----
 
 func.func @transpose_1d(%input: tensor<16xf32>,
                         %init: tensor<16xf32>) -> tensor<16xf32> {
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
index d31ceb5fd719f..a9cbaaf7fdc48 100644
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -1087,47 +1087,3 @@ func.func @drop_known_unit_constant_low_high(%arg0: tensor<1x383x128xf32>) -> te
 //       CHECK:   } : tensor<383x128xf32> to tensor<384x128xf32>
 //       CHECK:   tensor.expand_shape %[[PADDED]]
 //  CHECK-SAME:     {{\[}}[0, 1], [2]] output_shape [1, 384, 128] : tensor<384x128xf32> into tensor<1x384x128xf32>
-
-
-// -----
-
-// CHECK: #[[$MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
-// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (0, d0)>
-// CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> ()>
-
-// CHECK-LABEL: func @drop_unit_dim_corresponding_to_dynamic_dim
-// CHECK-SAME:                    %[[ARG0:.*]]: tensor<1x?x?x1xf32>,
-// CHECK-SAME:                    %[[ARG1:.*]]: index) -> tensor<?x1x61x1xf32> {
-// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : index
-// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_2:.*]] = arith.constant dense<1.000000e+00> : tensor<f32>
-// CHECK:           %[[VAL_3:.*]] = tensor.collapse_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32>
-// CHECK:           %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor<?x61xf32>
-// CHECK:           %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]]
-// CHECK:           %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor<?x61xf32>
-// CHECK:           %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%[[VAL_6]] : tensor<?x61xf32>) {
-// CHECK:           ^bb0(%[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32, %[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32):
-// CHECK:             %[[VAL_12:.*]] = arith.mulf %[[VAL_8]], %[[VAL_9]] : f32
-// CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_10]], %[[VAL_12]] : f32
-// CHECK:             linalg.yield %[[VAL_13]] : f32
-// CHECK:           } -> tensor<?x61xf32>
-// CHECK:           %[[VAL_14:.*]] = tensor.expand_shape %[[VAL_7]] {{\[\[}}0, 1], [2, 3]] output_shape {{\[}}%[[VAL_0]], 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32>
-// CHECK:           return %[[VAL_14]] : tensor<?x1x61x1xf32>
-// CHECK:         }
-
-#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
-#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
-#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
-module {
-  func.func @drop_unit_dim_corresponding_to_dynamic_dim(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
-    %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
-    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
-    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
-    ^bb0(%in: f32, %in_0: f32, %out: f32):
-      %2 = arith.mulf %in, %in_0 : f32
-      %3 = arith.addf %out, %2 : f32
-      linalg.yield %3 : f32
-    } -> tensor<?x1x61x1xf32>
-    return %1 : tensor<?x1x61x1xf32>
-  }
-}

>From 87e4804661eae6a806b0e504d539b826dac2e81d Mon Sep 17 00:00:00 2001
From: Sayan Saha <sayans at mathworks.com>
Date: Tue, 14 May 2024 21:29:47 -0400
Subject: [PATCH 3/4] [Task] : Add verifier for genericOp.

---
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp   |  7 +++--
 mlir/test/Dialect/Linalg/canonicalize.mlir | 31 +++++++++++++---------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index b7890a4cc1377..283c81a9e88bc 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1170,10 +1170,9 @@ static LogicalResult commonOpVerifier(LinalgOp linalgOp) {
                 !sourceType.isDynamicDim(i))
               // For input operands populate affineExprToSize to hold onto the
               // shape for an affineDimExpr if the shape isn't populated
-              // already. For dynamic shape overwrite the shape content (if
-              // needed), this accounts for broadcasting where one input can
-              // have fixed shape but the others can have dynamic shape for the
-              // same dimension
+              // already. Skip dynamic shapes to account for broadcasting where
+              // one input can have fixed shape but the others can have dynamic
+              // shape for the same dimension
               affineExprToSize.try_emplace(affineDimExpr, dimShape);
           } else if (affineExprToSize.contains(affineDimExpr)
                           && affineExprToSize[affineDimExpr] != dimShape)
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 52e311ddbae5a..137c0784bc1e3 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -605,19 +605,22 @@ func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> t
 
 // -----
 
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 // CHECK-LABEL: func @cast_dest
-// CHECK-SAME:  %[[ARG0:.*]]: tensor<1x?x?x1xf32>, %[[ARG1:.*]]: index
-func.func @cast_dest(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
-    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
+// CHECK-SAME:  %[[ARG0:.*]]: tensor<1x1x61x1xf32>
+func.func @cast_dest(%arg0: tensor<1x1x61x1xf32>) -> tensor<?x1x61x1xf32> {
+    %c0 = arith.constant 0 : index
+    %dim = tensor.dim %arg0, %c0 : tensor<1x1x61x1xf32>
+    %0 = tensor.empty(%dim) : tensor<?x1x61x1xf32>
+    %inserted_slice = tensor.insert_slice %arg0 into %0[0, 0, 0, 0] [1, 1, 61, 1] [1, 1, 1, 1] : tensor<1x1x61x1xf32> into tensor<?x1x61x1xf32>
     %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
-%1 = linalg.conv_2d_nhwc_hwcf ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) -> tensor<?x1x61x1xf32>
-return %1 : tensor<?x1x61x1xf32>
-// CHECK:      %[[CONV_OP:.*]] = linalg.conv_2d_nhwc_hwcf 
-// CHECK-SAME: ins(%[[ARG0]], %{{.*}} : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) 
-// CHECK-SAME: outs(%{{.*}} : tensor<1x1x61x1xf32>)
-// CHECK: tensor.cast %[[CONV_OP]] : tensor<1x1x61x1xf32> to tensor<?x1x61x1xf32>
-}
+    %1 = linalg.conv_2d_nhwc_hwcf ins(%inserted_slice, %cst : tensor<?x1x61x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) -> tensor<?x1x61x1xf32>
+    return %1 : tensor<?x1x61x1xf32>
+    // CHECK:           %[[VAL_1:.*]] = tensor.empty() : tensor<1x1x61x1xf32>
+    // CHECK:           %[[CONV_OUT:.*]] = linalg.conv_2d_nhwc_hwcf 
+    // CHECK-SAME:      ins(%[[ARG0]], %{{.*}} : tensor<1x1x61x1xf32>, tensor<1x1x1x1xf32>) 
+    // CHECK-SAME:      outs(%[[VAL_1]] : tensor<1x1x61x1xf32>)
+    // CHECK:           tensor.cast %[[CONV_OUT]] : tensor<1x1x61x1xf32> to tensor<?x1x61x1xf32>
+  }
 
 // -----
 
@@ -817,10 +820,12 @@ func.func @fold_conv_op_with_cast_consumer(%arg0 : tensor<?x?x?x?xf32>,
 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
 //  CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>)
 //       CHECK:  %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
+//       CHECK:  %[[IN1_CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?x?xf32> to tensor<4x?x?x?xf32>
+//       CHECK:  %[[IN2_CAST:.+]] = tensor.cast %[[ARG1]] : tensor<?x?x?x?xf32> to tensor<8x?x?x?xf32>
 //       CHECK:  %[[CONV:.+]] = linalg.conv_2d_nchw_fchw
-//  CHECK-SAME:      ins(%[[ARG0]], %[[ARG1]] :
+//  CHECK-SAME:      ins(%[[IN1_CAST]], %[[IN2_CAST]] :
 //  CHECK-SAME:      outs(%[[OUT_CAST]] :
-//       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]]
+//       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]] : tensor<4x8x12x16xf32> to tensor<?x?x?x?xf32>
 //       CHECK:  return %[[CONV]], %[[RESULT_CAST]]
 
 // -----

>From 7ffc03142528f23a8b0dbdadd82497f5f7eaa22a Mon Sep 17 00:00:00 2001
From: Sayan Saha <sayans at mathworks.com>
Date: Tue, 14 May 2024 21:30:58 -0400
Subject: [PATCH 4/4] [Task] : Run clang-format.

---
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 283c81a9e88bc..1963c222f3f34 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1157,7 +1157,7 @@ static LogicalResult commonOpVerifier(LinalgOp linalgOp) {
     auto sourceMap = linalgOp.getMatchingIndexingMap(&opOperand);
 
     Value source = opOperand.get();
-    if (auto sourceType = llvm::cast<ShapedType>(source.getType())) {
+    if (auto sourceType = dyn_cast<ShapedType>(source.getType())) {
       auto sourceShape = sourceType.getShape();
       bool isInputOperand = linalgOp.isDpsInput(&opOperand);
 
@@ -1174,8 +1174,8 @@ static LogicalResult commonOpVerifier(LinalgOp linalgOp) {
               // one input can have fixed shape but the others can have dynamic
               // shape for the same dimension
               affineExprToSize.try_emplace(affineDimExpr, dimShape);
-          } else if (affineExprToSize.contains(affineDimExpr)
-                          && affineExprToSize[affineDimExpr] != dimShape)
+          } else if (affineExprToSize.contains(affineDimExpr) &&
+                     affineExprToSize[affineDimExpr] != dimShape)
             // If shape for a affineDimExpr is already known from the input
             // operand's map ensure that the shapes match across the output
             // operands.
@@ -1188,9 +1188,7 @@ static LogicalResult commonOpVerifier(LinalgOp linalgOp) {
   return success();
 }
 
-LogicalResult GenericOp::verify() {
-  return commonOpVerifier(*this);
-}
+LogicalResult GenericOp::verify() { return commonOpVerifier(*this); }
 
 namespace {