[Mlir-commits] [mlir] [mlir][linalg] fix specialization of transposed matmul variants (PR #181387)

Fri Feb 13 09:51:17 PST 2026

================
@@ -331,3 +398,107 @@ func.func @op_matvec(%A: tensor<?x?xf32>, %B: tensor<?xf32>, %Out: tensor<?xf32>
 }
 // CHECK-LABEL: op_matvec
 // CHECK: linalg.generic
+
+// -----
+
+// Matmul transpose A: A is accessed as (k, m) instead of (m, k)
+#mapTA0 = affine_map<(d0, d1, d2) -> (d2, d0)>
+#mapTA1 = affine_map<(d0, d1, d2) -> (d2, d1)>
+#mapTA2 = affine_map<(d0, d1, d2) -> (d0, d1)>
+func.func @op_matmul_transpose_a(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %0 = linalg.generic
+         {indexing_maps = [#mapTA0, #mapTA1, #mapTA2], iterator_types = ["parallel", "parallel", "reduction"]}
+         ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
+   ^bb0(%in: f32, %in_0: f32, %out: f32):
+     %1 = arith.mulf %in, %in_0 : f32
+     %2 = arith.addf %out, %1 : f32
+     linalg.yield %2 : f32
+   } -> tensor<?x?xf32>
+   return %0 : tensor<?x?xf32>
+}
+
+// CHECK-DAG: #[[$MAP_TA_A:.+]] = affine_map<(d0, d1, d2) -> (d2, d0)>
+// CHECK-DAG: #[[$MAP_TA_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$MAP_TA_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+// CHECK-LABEL: op_matmul_transpose_a
+// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.matmul indexing_maps = [#[[$MAP_TA_A]], #[[$MAP_TA_B]], #[[$MAP_TA_C]]] ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
----------------
adam-smnk wrote:

nit: split into multiple checks with `CHECK-SAME` for shorter lines to improve readibility 

https://github.com/llvm/llvm-project/pull/181387