[Mlir-commits] [mlir] [MLIR][Linalg] Remove elemwise_unary and elemwise_binary (PR #147082)

Sat Jul 5 10:44:27 PDT 2025

https://github.com/rengolin updated https://github.com/llvm/llvm-project/pull/147082

>From dbf79307c54816529b59fc037a61b0d63f212921 Mon Sep 17 00:00:00 2001
From: Renato Golin <rengolin at systemcall.eu>
Date: Fri, 4 Jul 2025 16:30:22 +0100
Subject: [PATCH 1/6] Remove old element_wise operations

---
 .../Linalg/IR/LinalgNamedStructuredOps.yaml   | 114 ------------------
 .../linalg/opdsl/ops/core_named_ops.py        |  31 -----
 2 files changed, 145 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
index 6344861c53ac5..3637147c5a90d 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
@@ -44,56 +44,6 @@ structured_op: !LinalgStructuredOpConfig
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
-metadata: !LinalgOpMetadata
-  name: elemwise_unary
-  cpp_class_name: ElemwiseUnaryOp
-  doc: |-
-    Applies the unary function fun elementwise.
-
-    Numeric casting is performed on the input operand, promoting it to the same
-    data type as the accumulator/output.
-structured_op: !LinalgStructuredOpConfig
-  args:
-  - !LinalgOperandDefConfig
-    name: I
-    kind: input_tensor
-    type_var: T1
-    shape_map: affine_map<() -> ()>
-  - !LinalgOperandDefConfig
-    name: O
-    kind: output_tensor
-    type_var: U
-    shape_map: affine_map<() -> ()>
-  - !LinalgOperandDefConfig
-    name: fun
-    kind: unary_fn_attr
-    default_fn: exp
-  - !LinalgOperandDefConfig
-    name: cast
-    kind: type_fn_attr
-    default_fn: cast_signed
-  indexing_maps: !LinalgIndexingMapsConfig
-    static_indexing_maps:
-    - affine_map<() -> ()>
-    - affine_map<() -> ()>
-  iterator_types: []
-  assignments:
-  - !ScalarAssign
-    arg: O
-    value: !ScalarExpression
-      scalar_fn:
-        kind: unary
-        attr_name: fun
-        operands:
-        - !ScalarExpression
-          scalar_fn:
-            kind: type
-            attr_name: cast
-            type_var: U
-            operands:
-            - !ScalarExpression
-              scalar_arg: I
---- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: exp
   cpp_class_name: ExpOp
@@ -549,70 +499,6 @@ structured_op: !LinalgStructuredOpConfig
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
-metadata: !LinalgOpMetadata
-  name: elemwise_binary
-  cpp_class_name: ElemwiseBinaryOp
-  doc: |-
-    Applies the binary function fun elementwise.
-
-    Numeric casting is performed on the input operand, promoting it to the same
-    data type as the accumulator/output.
-structured_op: !LinalgStructuredOpConfig
-  args:
-  - !LinalgOperandDefConfig
-    name: lhs
-    kind: input_tensor
-    type_var: T1
-    shape_map: affine_map<() -> ()>
-  - !LinalgOperandDefConfig
-    name: rhs
-    kind: input_tensor
-    type_var: T2
-    shape_map: affine_map<() -> ()>
-  - !LinalgOperandDefConfig
-    name: O
-    kind: output_tensor
-    type_var: U
-    shape_map: affine_map<() -> ()>
-  - !LinalgOperandDefConfig
-    name: fun
-    kind: binary_fn_attr
-    default_fn: add
-  - !LinalgOperandDefConfig
-    name: cast
-    kind: type_fn_attr
-    default_fn: cast_signed
-  indexing_maps: !LinalgIndexingMapsConfig
-    static_indexing_maps:
-    - affine_map<() -> ()>
-    - affine_map<() -> ()>
-    - affine_map<() -> ()>
-  iterator_types: []
-  assignments:
-  - !ScalarAssign
-    arg: O
-    value: !ScalarExpression
-      scalar_fn:
-        kind: binary
-        attr_name: fun
-        operands:
-        - !ScalarExpression
-          scalar_fn:
-            kind: type
-            attr_name: cast
-            type_var: U
-            operands:
-            - !ScalarExpression
-              scalar_arg: lhs
-        - !ScalarExpression
-          scalar_fn:
-            kind: type
-            attr_name: cast
-            type_var: U
-            operands:
-            - !ScalarExpression
-              scalar_arg: rhs
---- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: add
   cpp_class_name: AddOp
diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
index 48e724d80c926..1b359da40a291 100644
--- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
@@ -21,21 +21,6 @@ def copy(
     O[None] = cast(U, I[None])
 
 
- at linalg_structured_op
-def elemwise_unary(
-    I=TensorDef(T1),
-    O=TensorDef(U, output=True),
-    fun=UnaryFnAttrDef(default=UnaryFn.exp),
-    cast=TypeFnAttrDef(default=TypeFn.cast_signed),
-):
-    """Applies the unary function fun elementwise.
-
-    Numeric casting is performed on the input operand, promoting it to the same
-    data type as the accumulator/output.
-    """
-    O[None] = fun(cast(U, I[None]))
-
-
 @linalg_structured_op
 def exp(
     I=TensorDef(T1),
@@ -192,22 +177,6 @@ def erf(
     O[None] = UnaryFn.erf(I[None])
 
 
- at linalg_structured_op
-def elemwise_binary(
-    lhs=TensorDef(T1),
-    rhs=TensorDef(T2),
-    O=TensorDef(U, output=True),
-    fun=BinaryFnAttrDef(default=BinaryFn.add),
-    cast=TypeFnAttrDef(default=TypeFn.cast_signed),
-):
-    """Applies the binary function fun elementwise.
-
-    Numeric casting is performed on the input operand, promoting it to the same
-    data type as the accumulator/output.
-    """
-    O[None] = fun(cast(U, lhs[None]), cast(U, rhs[None]))
-
-
 @linalg_structured_op
 def add(
     lhs=TensorDef(T1),

>From 33a7435b443ee9801ba9e7c7ba2f88ccfff29f72 Mon Sep 17 00:00:00 2001
From: Renato Golin <rengolin at systemcall.eu>
Date: Fri, 4 Jul 2025 16:59:05 +0100
Subject: [PATCH 2/6] replacing elemwise_* with named ops

---
 .../generalize-named-polymorphic-ops.mlir     | 111 ------------------
 mlir/test/Dialect/Linalg/invalid.mlir         |   8 --
 mlir/test/Dialect/Linalg/library-calls.mlir   |  40 -------
 .../Dialect/Linalg/match-ops-interpreter.mlir |   6 +-
 .../Linalg/one-shot-bufferize-analysis.mlir   |  38 ------
 .../transform-op-fuse-into-containing.mlir    |  42 +++----
 .../Dialect/Linalg/transform-op-fuse.mlir     |  72 ++++++------
 .../Linalg/transform-op-generalize.mlir       |   4 +-
 8 files changed, 62 insertions(+), 259 deletions(-)

diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
index bbd6e0fc8e2cc..290c6c7c36f76 100644
--- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
@@ -434,117 +434,6 @@ func.func @generalize_const(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf
 
 // -----
 
-// Verifies the default value of the fun attribute is an exp op.
-func.func @generalize_elemwise_exp(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_exp
-// CHECK:        = math.exp
-
-// -----
-
-// Verifies the fun attribute controls the unary function used.
-func.func @generalize_elemwise_log(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<log>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_log
-// CHECK:        = math.log
-
-// -----
-
-// Verifies the fun attribute controls the unary function used.
-func.func @generalize_elemwise_abs(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<abs>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_abs
-// CHECK:        = math.absf
-
-// -----
-
-// Verifies the fun attribute controls the unary function used.
-func.func @generalize_elemwise_ceil(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<ceil>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_ceil
-// CHECK:        = math.ceil
-
-// -----
-
-// Verifies the fun attribute controls the unary function used.
-func.func @generalize_elemwise_floor(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<floor>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_floor
-// CHECK:        = math.floor
-
-// -----
-
-// Verifies the fun attribute controls the unary function used.
-func.func @generalize_elemwise_negf(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<negf>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_negf
-// CHECK:        = arith.negf
-
-// -----
-
-// Verifies the default value of the fun attribute is an add op.
-func.func @generalize_elemwise_add(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_binary ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_add
-// CHECK:        = arith.addf
-
-// -----
-
-// Verifies the fun attribute controls the binary function used.
-func.func @generalize_elemwise_mul(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<mul>}
-                              ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_mul
-// CHECK:        = arith.mulf
-
-// -----
-
-// Verifies pointwise ops support rank zero input tensors
-func.func @generalize_elemwise_rank_zero(%lhs : tensor<f32>, %rhs : tensor<f32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<sub>}
-                              ins(%lhs, %rhs: tensor<f32>, tensor<f32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
-  return %0: tensor<4x8xf32>
-}
-
-// CHECK-LABEL: @generalize_elemwise_rank_zero
-// CHECK:       linalg.generic
-// CHECK-SAME:  iterator_types = ["parallel", "parallel"]
-// CHECK:        = arith.subf
-
-// -----
-
 // Verifies the fun attribute controls the binary function used.
 func.func @generalize_copy(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index 964681d7dcd92..da1dfc7b6a624 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -1909,14 +1909,6 @@ module {
 
 // -----
 
-func.func @elemwise_unary_invalid_mixed_types(%arg0 : tensor<?xi32>) -> tensor<?xi32> {
-  // expected-error @below {{unsupported non numeric type}}
-  %0 = linalg.elemwise_unary ins(%arg0 : tensor<?xi32>) outs(%arg0 : tensor<?xi32>) -> tensor<?xi32>
-  return %0 : tensor<?xi32>
-}
-
-// -----
-
 func.func @matmul_invalid_mixed_types(%t: tensor<?xf16>, %f: vector<4xf16>)
   -> (tensor<?xf16>, vector<4xf16>)
 {
diff --git a/mlir/test/Dialect/Linalg/library-calls.mlir b/mlir/test/Dialect/Linalg/library-calls.mlir
index 1fa675d8b4b68..77c9d4a911447 100644
--- a/mlir/test/Dialect/Linalg/library-calls.mlir
+++ b/mlir/test/Dialect/Linalg/library-calls.mlir
@@ -59,43 +59,3 @@ module {
     return
   }
 }
-
-
-// -----
-
-// CHECK: func.func private @linalg_elemwise_unary_negf_view16x8xf32_view16x8xf32(memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>) attributes {llvm.emit_c_interface}
-// CHECK: func.func private @linalg_elemwise_unary_negf_view16xf32_view16xf32(memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>) attributes {llvm.emit_c_interface}
-
-func.func @test_neg(%A : memref<16x8xf32>, %B: memref<16x8xf32>, %C: memref<16xf32>, %D: memref<16xf32>) {
-  linalg.elemwise_unary {fun = #linalg.unary_fn<negf>}
-                              ins(%A: memref<16x8xf32>) outs(%B: memref<16x8xf32>)
-  linalg.elemwise_unary {fun = #linalg.unary_fn<negf>}
-                              ins(%C: memref<16xf32>) outs(%D: memref<16xf32>)
-  return
-}
-
-// -----
-
-// CHECK: func.func private @linalg_elemwise_unary_exp_view16x8xf32_view16x8xf32(memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>) attributes {llvm.emit_c_interface}
-// CHECK: func.func private @linalg_elemwise_unary_exp_view16xf32_view16xf32(memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>) attributes {llvm.emit_c_interface}
-
-func.func @test_exp(%A : memref<16x8xf32>, %B: memref<16x8xf32>, %C: memref<16xf32>, %D: memref<16xf32>) {
-  linalg.elemwise_unary {fun = #linalg.unary_fn<exp>}
-                              ins(%A: memref<16x8xf32>) outs(%B: memref<16x8xf32>)
-  linalg.elemwise_unary {fun = #linalg.unary_fn<exp>}
-                              ins(%C: memref<16xf32>) outs(%D: memref<16xf32>)
-  return
-}
-
-// -----
-
-// CHECK: func.func private @linalg_elemwise_binary_add_view16x8xf32_view16x8xf32_view16x8xf32(memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>) attributes {llvm.emit_c_interface}
-// CHECK: func.func private @linalg_elemwise_binary_add_view16xf32_view16xf32_view16xf32(memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>) attributes {llvm.emit_c_interface}
-
-func.func @test_add(%A : memref<16x8xf32>, %B: memref<16x8xf32>, %C: memref<16x8xf32>, %D: memref<16xf32>, %E: memref<16xf32>, %F: memref<16xf32>) {
-  linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
-                              ins(%A, %B: memref<16x8xf32>, memref<16x8xf32>) outs(%C: memref<16x8xf32>)
-  linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
-                              ins(%D, %E: memref<16xf32>, memref<16xf32>) outs(%F: memref<16xf32>)
-  return
-}
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index bfe7a07cb38a5..618ba3402ff52 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -842,15 +842,15 @@ module attributes { transform.with_named_sequence } {
     // expected-remark @below {{op result}}
     // expected-note @below {{value handle points to an op result #0}}
     // expected-remark @below {{single user}}
-    linalg.elemwise_unary {fun = #linalg.unary_fn<negf>} ins(%2 : tensor<42x42xf32>) outs(%0 : tensor<42x42xf32>) -> tensor<42x42xf32>
+    linalg.negf ins(%2 : tensor<42x42xf32>) outs(%0 : tensor<42x42xf32>) -> tensor<42x42xf32>
     // expected-remark @below {{matched result value}}
     // expected-remark @below {{op result}}
     // expected-note @below {{value handle points to an op result #0}}
-    linalg.elemwise_unary {fun = #linalg.unary_fn<exp>} ins(%3 : tensor<42x42xf32>) outs(%0 : tensor<42x42xf32>) -> tensor<42x42xf32>
+    linalg.exp ins(%3 : tensor<42x42xf32>) outs(%0 : tensor<42x42xf32>) -> tensor<42x42xf32>
     // expected-remark @below {{matched result value}}
     // expected-remark @below {{op result}}
     // expected-note @below {{value handle points to an op result #0}}
-    linalg.elemwise_unary {fun = #linalg.unary_fn<exp>} ins(%3 : tensor<42x42xf32>) outs(%0 : tensor<42x42xf32>) -> tensor<42x42xf32>
+    linalg.exp ins(%3 : tensor<42x42xf32>) outs(%0 : tensor<42x42xf32>) -> tensor<42x42xf32>
     return
   }
 }
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir
index 5b7c2baf9d84f..a0922bdfcfbe4 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir
@@ -1,43 +1,5 @@
 // RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
 
-// CHECK-LABEL: @elementwise_no_conflict
-func.func @elementwise_no_conflict(%a: tensor<5xf32>,
-                                   %b: tensor<5xf32>) -> tensor<5xf32> {
-  // CHECK: linalg.elemwise_binary
-  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"], fun = #linalg.binary_fn<add>}
-  %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
-      ins(%a, %b : tensor<5xf32>, tensor<5xf32>)
-      outs(%a : tensor<5xf32>) -> tensor<5xf32>
-  return %0 : tensor<5xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @elementwise_no_conflict_2
-func.func @elementwise_no_conflict_2(%a: tensor<5xf32>) -> tensor<5xf32> {
-  // CHECK: linalg.elemwise_binary
-  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"], fun = #linalg.binary_fn<add>}
-  %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
-      ins(%a, %a : tensor<5xf32>, tensor<5xf32>)
-      outs(%a : tensor<5xf32>) -> tensor<5xf32>
-  return %0 : tensor<5xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @elementwise_no_conflict_3
-func.func @elementwise_no_conflict_3(%a: tensor<5xf32>) -> tensor<5xf32> {
-  %c0f = arith.constant 1.0 : f32
-  // CHECK: linalg.elemwise_binary
-  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "true"], fun = #linalg.binary_fn<add>}
-  %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
-      ins(%a, %c0f : tensor<5xf32>, f32)
-      outs(%a : tensor<5xf32>) -> tensor<5xf32>
-  return %0 : tensor<5xf32>
-}
-
-// -----
-
 func.func @not_elementwise(%a: tensor<5x6xf32>) -> tensor<5x6xf32> {
   %cst = arith.constant 5.0 : f32
   // CHECK: tensor.extract_slice
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
index 5bdb5073ee865..312468970ae6d 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
@@ -26,8 +26,8 @@ module {
       // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]]
       %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T2:.*]] = linalg.exp ins(%[[T1]]
+      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -76,8 +76,8 @@ module {
       %4 = affine.min #map2(%arg3)[%arg0]
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor<?xf32>
 
-      // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[INIT_TENSOR]]
-      %7 = linalg.elemwise_unary ins(%0 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T2:.*]] = linalg.exp ins(%[[INIT_TENSOR]]
+      %7 = linalg.exp ins(%0 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<64xf32>
       }
@@ -177,8 +177,8 @@ module {
       // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]]
       %6 = tensor.extract_slice %arg1[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} outs(%[[T1]]
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T2:.*]] = linalg.exp {{.*}} outs(%[[T1]]
+      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -228,8 +228,8 @@ module {
       // CHECK: %[[T2:.*]] = linalg.fill {{.*}} outs(%[[T1]]
       %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: %[[T3:.*]] = linalg.elemwise_unary ins(%[[T2]] : tensor<?xf32>) outs(%[[T0]] : tensor<?xf32>)
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T3:.*]] = linalg.exp ins(%[[T2]] : tensor<?xf32>) outs(%[[T0]] : tensor<?xf32>)
+      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -261,7 +261,7 @@ module {
     %c2 = arith.constant 2 : index
     %c1 = arith.constant 1 : index
     %c0 = arith.constant 0 : index
-    %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<abs>} ins(%arg0 : tensor<?x?x?xf32>) outs(%arg1 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+    %0 = linalg.exp {fun = #linalg.unary_fn<abs>} ins(%arg0 : tensor<?x?x?xf32>) outs(%arg1 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
     %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
     %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
     %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
@@ -272,12 +272,12 @@ module {
       %2 = scf.for %arg4 = %c0 to %dim_0 step %c1 iter_args(%arg5 = %arg3) -> (tensor<?x?x?xf32>) {
         %3 = scf.for %arg6 = %c0 to %dim_1 step %c1 iter_args(%arg7 = %arg5) -> (tensor<?x?x?xf32>) {
           // CHECK:  %[[EX1:.*]] = tensor.extract_slice %[[BBARG2]]{{.*}}: tensor<?x?x?xf32> to tensor<1x1x1xf32>
-          // CHECK:  linalg.elemwise_unary {fun = #linalg.unary_fn<abs>} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX1]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
+          // CHECK:  linalg.exp {fun = #linalg.unary_fn<abs>} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX1]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
           // CHECK:  %[[EX2:.*]] = tensor.extract_slice %[[BBARG2]]{{.*}} : tensor<?x?x?xf32> to tensor<1x1x1xf32>
-          // CHECK:  linalg.elemwise_unary {fun = #linalg.unary_fn<exp>} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX2]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
+          // CHECK:  linalg.exp {fun = #linalg.unary_fn<exp>} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX2]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
           %extracted_slice = tensor.extract_slice %0[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor<?x?x?xf32> to tensor<1x1x1xf32>
           %extracted_slice_2 = tensor.extract_slice %arg7[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor<?x?x?xf32> to tensor<1x1x1xf32>
-          %4 = linalg.elemwise_unary {fun = #linalg.unary_fn<exp>} ins(%extracted_slice : tensor<1x1x1xf32>) outs(%extracted_slice_2 : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
+          %4 = linalg.exp {fun = #linalg.unary_fn<exp>} ins(%extracted_slice : tensor<1x1x1xf32>) outs(%extracted_slice_2 : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
           %inserted_slice = tensor.insert_slice %4 into %arg7[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor<1x1x1xf32> into tensor<?x?x?xf32>
           scf.yield %inserted_slice : tensor<?x?x?xf32>
         }
@@ -290,7 +290,7 @@ module {
 
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["linalg.elemwise_unary"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
       %1 = transform.structured.match ops{["scf.for"]} in %arg0 : (!transform.any_op) -> !transform.any_op
       %2:2 = transform.split_handle %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
       %3:3 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
@@ -340,8 +340,8 @@ module {
       // CHECK: %[[T1:.*]]:2 = linalg.generic {{.*}} ins(%[[T0]]
       %6 = tensor.extract_slice %0#0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]#0
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T2:.*]] = linalg.exp ins(%[[T1]]#0
+      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -376,8 +376,8 @@ module {
       %2 = tensor.extract_slice %0[%i][1][1] : tensor<2xf32> to tensor<1xf32>
       %3 = tensor.extract_slice %arg1[%i][1][1] : tensor<2xf32> to tensor<1xf32>
       // CHECK: %[[FUSED:.+]] = linalg.fill
-      // CHECK: elemwise_unary ins(%[[FUSED]]
-      %4 = linalg.elemwise_unary ins(%2 : tensor<1xf32>) outs(%3 : tensor<1xf32>) -> tensor<1xf32>
+      // CHECK: exp ins(%[[FUSED]]
+      %4 = linalg.exp ins(%2 : tensor<1xf32>) outs(%3 : tensor<1xf32>) -> tensor<1xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %4 into %arg1[%i][1][1] : tensor<1xf32> into tensor<2xf32>
       }
@@ -446,7 +446,7 @@ module {
       // CHECK: %[[T1:.*]]:2 = linalg.generic {{.*}}
       %6 = tensor.extract_slice %0#0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         // CHECK: tensor.parallel_insert_slice %[[T1]]#0 into %[[ARG7]][%[[I0]]] [%[[I1]]] [1] : tensor<?xf32> into tensor<?xf32>
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
@@ -515,7 +515,7 @@ module {
       // CHECK: %[[T1:.*]] = linalg.generic {{.*}}
       %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         // CHECK: tensor.parallel_insert_slice %[[T1]] into %[[ARG7]][%[[I0]]] [%[[I1]]] [1] : tensor<?xf32> into tensor<?xf32>
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
@@ -582,7 +582,7 @@ module {
       // CHECK: %[[T1:.*]] = linalg.generic {{.*}}
       %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         // CHECK: tensor.parallel_insert_slice %[[T1]] into %[[ARG7]][%[[I0]]] [%[[I1]]] [1] : tensor<?xf32> into tensor<?xf32>
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
@@ -658,7 +658,7 @@ module {
       // CHECK: %[[T2:.*]] = linalg.generic {{.*}}
       %7 = tensor.extract_slice %1[%4] [%5] [1] : tensor<?xf32> to tensor<?xf32>
 
-      %8 = linalg.elemwise_unary ins(%7 : tensor<?xf32>) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
+      %8 = linalg.exp ins(%7 : tensor<?xf32>) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         // CHECK: tensor.parallel_insert_slice %[[T2]] into %[[ARG7]][%[[I0]]] [%[[I1]]] [1] : tensor<?xf32> into tensor<?xf32>
         tensor.parallel_insert_slice %8 into %o[%2] [%5] [1] : tensor<?xf32> into tensor<?xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
index 962858076db93..9a44f95afb586 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -5,19 +5,19 @@ func.func @fuse_unary(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<
 
   //     CHECK: %[[RES:.*]] = scf.for
   //     CHECK:    scf.for
-  //     CHECK:       linalg.elemwise_unary
-  //     CHECK:       linalg.elemwise_binary
+  //     CHECK:       linalg.exp
+  //     CHECK:       linalg.add
   //     CHECK: return %[[RES]]
-  %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
+  %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
-  %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
+  %1 = linalg.add ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_binary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
       : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
@@ -31,23 +31,23 @@ func.func @fuse_unary(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<
 
   //     CHECK: %[[PARTIAL_RES:.*]] = scf.for
   //     CHECK:     scf.for
-  //     CHECK:       linalg.elemwise_unary
-  //     CHECK:       linalg.elemwise_binary
+  //     CHECK:       linalg.exp
+  //     CHECK:       linalg.add
   //     CHECK: %[[RES:.*]] = scf.for {{.*}}%[[PARTIAL_RES]]
   //     CHECK:     scf.for
-  //     CHECK:       linalg.elemwise_unary
-  //     CHECK:       linalg.elemwise_binary
+  //     CHECK:       linalg.exp
+  //     CHECK:       linalg.add
   //     CHECK: return %[[RES]]
-  %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
+  %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
-  %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
+  %1 = linalg.add ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_binary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
       : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
     transform.loop.peel %loops#0 : (!transform.op<"scf.for">) -> (!transform.any_op, !transform.any_op)
@@ -107,20 +107,20 @@ module attributes {transform.with_named_sequence} {
 // CHECK:         %[[RES:.*]] = scf.for
 // CHECK:           scf.for
 // CHECK:             linalg.unpack
-// CHECK:             linalg.elemwise_unary
+// CHECK:             linalg.exp
 // CHECK:         return %[[RES]]
 func.func @unpack_elemwise(%arg0: tensor<16x48x8x8xf32>, %arg1: tensor<128x384xf32>) -> tensor<128x384xf32> {
   %0 = tensor.empty() : tensor<128x384xf32>
   %1 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
       : tensor<16x48x8x8xf32> -> tensor<128x384xf32>
-  %2 = linalg.elemwise_unary ins(%1: tensor<128x384xf32>)
+  %2 = linalg.exp ins(%1: tensor<128x384xf32>)
                              outs(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32>
   return %2 : tensor<128x384xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_unary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.exp"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [16, 32], tile_interchange = [0, 1]}
       : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
@@ -133,20 +133,20 @@ module attributes {transform.with_named_sequence} {
 // CHECK:         %[[RES:.*]] = scf.for
 // CHECK:           scf.for
 // CHECK:             linalg.pack
-// CHECK:             linalg.elemwise_unary
+// CHECK:             linalg.exp
 // CHECK:         return %[[RES]]
 func.func @pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> {
   %0 = tensor.empty() : tensor<16x48x8x8xf32>
   %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
       : tensor<128x384xf32> -> tensor<16x48x8x8xf32>
-  %2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>)
+  %2 = linalg.exp ins(%1: tensor<16x48x8x8xf32>)
                              outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32>
   return %2 : tensor<16x48x8x8xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_unary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.exp"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [3, 5, 0, 0]}
       : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
@@ -159,20 +159,20 @@ module attributes {transform.with_named_sequence} {
 // CHECK:         linalg.pack
 // CHECK:         %[[RES:.*]] = scf.for
 // CHECK:           scf.for
-// CHECK:             linalg.elemwise_unary
+// CHECK:             linalg.exp
 // CHECK:         return %[[RES]]
 func.func @nofuse_pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> {
   %0 = tensor.empty() : tensor<16x48x8x8xf32>
   %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
       : tensor<128x384xf32> -> tensor<16x48x8x8xf32>
-  %2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>)
+  %2 = linalg.exp ins(%1: tensor<16x48x8x8xf32>)
                              outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32>
   return %2 : tensor<16x48x8x8xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_unary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.exp"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:3 = transform.structured.fuse %0 {tile_sizes = [3, 5, 2, 0]}
       : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
@@ -186,24 +186,24 @@ func.func @fuse_through_slice(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) ->
 
   //     CHECK: %[[RES:.*]] = scf.for
   //     CHECK:     scf.for
-  //     CHECK:       linalg.elemwise_unary
-  //     CHECK:       linalg.elemwise_binary
+  //     CHECK:       linalg.exp
+  //     CHECK:       linalg.add
   //     CHECK: return %[[RES]]
-  %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
+  %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
                              outs(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32>
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %dim0 = tensor.dim %arg1, %c0 : tensor<?x?xf32>
   %dim1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %1 = tensor.extract_slice %0 [1, 1] [%dim0, %dim1] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %2 = linalg.elemwise_binary ins(%1, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
+  %2 = linalg.add ins(%1, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %2 : tensor<?x?xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_binary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1], apply_cleanup = true}
       : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
     transform.yield
@@ -217,10 +217,10 @@ func.func @fuse_through_slice_and_cast_chain(%arg0: tensor<100x100xf32>, %arg1:
 
   //     CHECK: %[[RES:.*]] = scf.for
   //     CHECK:     scf.for
-  //     CHECK:       linalg.elemwise_unary
-  //     CHECK:       linalg.elemwise_binary
+  //     CHECK:       linalg.exp
+  //     CHECK:       linalg.add
   //     CHECK: return %[[RES]]
-  %0 = linalg.elemwise_unary ins(%arg0 : tensor<100x100xf32>)
+  %0 = linalg.exp ins(%arg0 : tensor<100x100xf32>)
                              outs(%arg0: tensor<100x100xf32>) -> tensor<100x100xf32>
   %1 = tensor.cast %0 : tensor<100x100xf32> to tensor<100x?xf32>
   %2 = tensor.extract_slice %1 [1, 1] [98, 98] [1, 1] : tensor<100x?xf32> to tensor<98x98xf32>
@@ -230,14 +230,14 @@ func.func @fuse_through_slice_and_cast_chain(%arg0: tensor<100x100xf32>, %arg1:
   %dim0 = tensor.dim %arg1, %c0 : tensor<?x?xf32>
   %dim1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %4 = tensor.extract_slice %3 [1, 1] [%dim0, %dim1] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %5 = linalg.elemwise_binary ins(%4, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
+  %5 = linalg.add ins(%4, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %5 : tensor<?x?xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_binary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1], apply_cleanup = true}
       : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
     transform.yield
@@ -253,8 +253,8 @@ func.func @fuse_unrelated_slices(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>)
   //     CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[SLICE1]]
   //     CHECK: %[[RES:.*]] = scf.for
   //     CHECK:     scf.for
-  //     CHECK:       linalg.elemwise_unary
-  //     CHECK:       linalg.elemwise_binary
+  //     CHECK:       linalg.exp
+  //     CHECK:       linalg.add
   //     CHECK: return %[[RES]], %[[SLICE2]]
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -262,17 +262,17 @@ func.func @fuse_unrelated_slices(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>)
   %dim1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %slice1 = tensor.extract_slice %arg0 [1, 1] [%dim0, %dim1] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %slice2 = tensor.extract_slice %slice1 [1, 1] [10, 10] [1, 1] : tensor<?x?xf32> to tensor<10x10xf32>
-  %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
+  %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
                              outs(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = tensor.extract_slice %0 [1, 1] [%dim0, %dim1] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %2 = linalg.elemwise_binary ins(%1, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
+  %2 = linalg.add ins(%1, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %2, %slice2 : tensor<?x?xf32>, tensor<10x10xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["linalg.elemwise_binary"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1], apply_cleanup = true}
       : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
     transform.yield
diff --git a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
index a0aa33c072dd4..331c9c0fbbfd5 100644
--- a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
@@ -3,9 +3,9 @@
 // CHECK-LABEL: func.func @generalize_unary
 func.func @generalize_unary(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
 
-  // CHECK-NOT:   linalg.elemwise_unary
+  // CHECK-NOT:   linalg.exp
   //     CHECK:   linalg.generic
-  %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
+  %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
                              outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }

>From cc10325d3b2114f4bad933673bd38d0dac3be2a7 Mon Sep 17 00:00:00 2001
From: Renato Golin <rengolin at systemcall.eu>
Date: Fri, 4 Jul 2025 17:02:25 +0100
Subject: [PATCH 3/6] more IR tests

---
 mlir/test/Dialect/SCF/canonicalize.mlir                   | 8 ++++----
 mlir/test/Integration/Dialect/Transform/match_matmul.mlir | 2 +-
 .../TilingInterface/tile-and-fuse-consumer.mlir           | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
index 5e32a3a78c032..8ba8013d008a0 100644
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -1811,7 +1811,7 @@ module {
       %4 = affine.min #map2(%arg3)[%dim, %arg0]
       %extracted_slice0 = tensor.extract_slice %arg4[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
       %extracted_slice1 = tensor.extract_slice %arg5[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
-      %5 = linalg.elemwise_unary ins(%extracted_slice0 : tensor<?xf32>) outs(%extracted_slice1 : tensor<?xf32>) -> tensor<?xf32>
+      %5 = linalg.exp ins(%extracted_slice0 : tensor<?xf32>) outs(%extracted_slice1 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %5 into %arg5[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -1825,7 +1825,7 @@ module {
 //  CHECK-SAME:                       shared_outs(%[[ITER_ARG_5:.*]] = %[[ARG2]]) -> (tensor<?xf32>) {
 //       CHECK:      %[[OPERAND0:.*]] = tensor.extract_slice %[[ARG1]]
 //       CHECK:      %[[OPERAND1:.*]] = tensor.extract_slice %[[ITER_ARG_5]]
-//       CHECK:      %[[ELEM:.*]] = linalg.elemwise_unary ins(%[[OPERAND0]] : tensor<?xf32>) outs(%[[OPERAND1]] : tensor<?xf32>) -> tensor<?xf32>
+//       CHECK:      %[[ELEM:.*]] = linalg.exp ins(%[[OPERAND0]] : tensor<?xf32>) outs(%[[OPERAND1]] : tensor<?xf32>) -> tensor<?xf32>
 //       CHECK:      scf.forall.in_parallel {
 //  CHECK-NEXT:         tensor.parallel_insert_slice %[[ELEM]] into %[[ITER_ARG_5]]
 //  CHECK-NEXT:      }
@@ -1851,7 +1851,7 @@ module {
       %extracted_slice_0 = tensor.extract_slice %arg6[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
       %extracted_slice_1 = tensor.extract_slice %arg7[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
       %extracted_slice_2 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
-      %5 = linalg.elemwise_unary ins(%extracted_slice : tensor<?xf32>) outs(%extracted_slice_1 : tensor<?xf32>) -> tensor<?xf32>
+      %5 = linalg.exp ins(%extracted_slice : tensor<?xf32>) outs(%extracted_slice_1 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %5 into %arg6[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
         tensor.parallel_insert_slice %extracted_slice into %arg5[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
@@ -1868,7 +1868,7 @@ module {
 //  CHECK-SAME:                       shared_outs(%[[ITER_ARG_6:.*]] = %[[ARG2]]) -> (tensor<?xf32>) {
 //       CHECK:      %[[OPERAND0:.*]] = tensor.extract_slice %[[ARG1]]
 //       CHECK:      %[[OPERAND1:.*]] = tensor.extract_slice %[[ARG3]]
-//       CHECK:      %[[ELEM:.*]] = linalg.elemwise_unary ins(%[[OPERAND0]] : tensor<?xf32>) outs(%[[OPERAND1]] : tensor<?xf32>) -> tensor<?xf32>
+//       CHECK:      %[[ELEM:.*]] = linalg.exp ins(%[[OPERAND0]] : tensor<?xf32>) outs(%[[OPERAND1]] : tensor<?xf32>) -> tensor<?xf32>
 //       CHECK:      scf.forall.in_parallel {
 //  CHECK-NEXT:         tensor.parallel_insert_slice %[[ELEM]] into %[[ITER_ARG_6]]
 //  CHECK-NEXT:      }
diff --git a/mlir/test/Integration/Dialect/Transform/match_matmul.mlir b/mlir/test/Integration/Dialect/Transform/match_matmul.mlir
index 7b695cb027252..a374d9a611258 100644
--- a/mlir/test/Integration/Dialect/Transform/match_matmul.mlir
+++ b/mlir/test/Integration/Dialect/Transform/match_matmul.mlir
@@ -69,7 +69,7 @@ func.func @matmul_with_extra_ops_in_func(%lhs: tensor<10x20xf32>, %rhs: tensor<2
   // expected-remark @below {{fill}}
   %fill = linalg.fill ins(%cst : f64) outs(%empty : tensor<10x15xf32>) -> tensor<10x15xf32>
 
-  %real_lhs = linalg.elemwise_binary { fun = #linalg.binary_fn<mul> } 
+  %real_lhs = linalg.mul
     ins(%lhs, %lhs : tensor<10x20xf32>, tensor<10x20xf32>) outs(%lhs : tensor<10x20xf32>) -> tensor<10x20xf32>
 
   // expected-remark @below {{matmul}}
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
index 0f69875d596f1..d09373bdb3f14 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
@@ -19,7 +19,7 @@ module {
     }
     %in_operand_2 = tensor.empty() : tensor<64xf32>
     %out_operand_3 = tensor.empty() : tensor<64xf32>
-    %2 = linalg.elemwise_binary {fun = #linalg.binary_fn<add>} ins(%1#1, %in_operand_2 : tensor<64xf32>, tensor<64xf32>) outs(%out_operand_3 : tensor<64xf32>) -> tensor<64xf32>
+    %2 = linalg.add ins(%1#1, %in_operand_2 : tensor<64xf32>, tensor<64xf32>) outs(%out_operand_3 : tensor<64xf32>) -> tensor<64xf32>
     return %2 : tensor<64xf32>
   }
 }
@@ -50,7 +50,7 @@ module attributes {transform.with_named_sequence} {
 //      CHECK:      %[[INSERT_MAT:.*]] = tensor.insert_slice %[[MAT_OUT]] into %[[FIRST_OUT_ARG]][%[[IV]]] [32] [1]
 //      CHECK:      %[[SLICE_OPERAND2:.*]] = tensor.extract_slice %0[%[[IV]]] [32] [1]
 //      CHECK:      %[[SLICE_OUT:.*]] = tensor.extract_slice %[[ELEM_OUT_ARG]][%[[IV]]] [32] [1]
-//      CHECK:      %[[ELEM_OUT:.*]] = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
+//      CHECK:      %[[ELEM_OUT:.*]] = linalg.add
 // CHECK-SAME:              ins(%[[MAT_OUT]], %[[SLICE_OPERAND2]] :
 // CHECK-SAME:              outs(%[[SLICE_OUT]] :
 //      CHECK:      %[[INSERT_ELEM:.*]] = tensor.insert_slice %[[ELEM_OUT]] into %[[ELEM_OUT_ARG]][%[[IV]]] [32] [1]
@@ -76,7 +76,7 @@ module {
     }
     %in_operand_2 = tensor.empty() : tensor<64x64xf32>
     %out_operand_3 = tensor.empty() : tensor<64x64xf32>
-    %2 = linalg.elemwise_binary {fun = #linalg.binary_fn<add>} ins(%1#1, %in_operand_2 : tensor<64x64xf32>, tensor<64x64xf32>) outs(%out_operand_3 : tensor<64x64xf32>) -> tensor<64x64xf32>
+    %2 = linalg.add ins(%1#1, %in_operand_2 : tensor<64x64xf32>, tensor<64x64xf32>) outs(%out_operand_3 : tensor<64x64xf32>) -> tensor<64x64xf32>
     return %2 : tensor<64x64xf32>
   }
 }
@@ -109,7 +109,7 @@ module attributes {transform.with_named_sequence} {
 // CHECK-SAME:              outs(%[[MAT_OUT_SLICE]] :
 //      CHECK:      %[[SLICE_OPERAND2:.*]] = tensor.extract_slice %[[OUT_INIT]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1]
 //      CHECK:      %[[SLICE_OUT:.*]] = tensor.extract_slice %[[ELEM_OUT_ARG]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1]
-//      CHECK:      %[[ELEM_OUT:.*]] = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
+//      CHECK:      %[[ELEM_OUT:.*]] = linalg.add
 // CHECK-SAME:              ins(%[[MAT_OUT]], %[[SLICE_OPERAND2]] :
 // CHECK-SAME:              outs(%[[SLICE_OUT]] :
 //      CHECK:      scf.forall.in_parallel {

>From 59ee2dd4956117c65183f818f2d907eb91e13f04 Mon Sep 17 00:00:00 2001
From: Renato Golin <rengolin at systemcall.eu>
Date: Fri, 4 Jul 2025 17:08:06 +0100
Subject: [PATCH 4/6] fixed python tests

---
 mlir/test/python/dialects/linalg/ops.py       |  36 ------
 .../integration/dialects/linalg/opsrun.py     | 121 ------------------
 2 files changed, 157 deletions(-)

diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py
index f1e2afa0f2408..709a1d2424f35 100644
--- a/mlir/test/python/dialects/linalg/ops.py
+++ b/mlir/test/python/dialects/linalg/ops.py
@@ -49,42 +49,6 @@ def fill_buffer(out):
     print(module)
 
 
-# CHECK-LABEL: TEST: testNamedStructuredOpCustomForm
- at run
-def testNamedStructuredOpCustomForm():
-    with Context() as ctx, Location.unknown():
-        module = Module.create()
-        f32 = F32Type.get()
-        with InsertionPoint(module.body):
-
-            @func.FuncOp.from_py_func(
-                RankedTensorType.get((4, 8), f32), RankedTensorType.get((4, 8), f32)
-            )
-            def named_form(lhs, rhs):
-                init_result = tensor.EmptyOp([4, 8], f32)
-                # Check for the named form with custom format
-                #      CHECK: linalg.elemwise_unary
-                # CHECK-SAME:    cast = #linalg.type_fn<cast_signed>
-                # CHECK-SAME:    fun = #linalg.unary_fn<exp>
-                # CHECK-SAME:    ins(%{{.*}} : tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>)
-                unary_result = linalg.elemwise_unary(lhs, outs=[init_result.result])
-                #      CHECK: linalg.elemwise_binary
-                # CHECK-SAME:    cast = #linalg.type_fn<cast_unsigned>
-                # CHECK-SAME:    fun = #linalg.binary_fn<mul>
-                # CHECK-SAME:    ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>)
-                #      CHECK: return
-                binary_result = linalg.elemwise_binary(
-                    lhs,
-                    rhs,
-                    outs=[init_result.result],
-                    fun=BinaryFn.mul,
-                    cast=TypeFn.cast_unsigned,
-                )
-                return unary_result, binary_result
-
-    print(module)
-
-
 # CHECK-LABEL: TEST: testIdentityRegionOps
 @run
 def testIdentityRegionOps():
diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
index 69f97e15e139d..8f202318146ee 100644
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -19,37 +19,6 @@ def log(*args):
     sys.stderr.flush()
 
 
-elemwise_boiler = """
-func.func @main() -> f32 attributes {llvm.emit_c_interface} {
-  %v0 = arith.constant 0.0 : f32
-  %v1 = arith.constant 1.0 : f32
-  %v2 = arith.constant 2.0 : f32
-
-  %lhs = memref.alloc() : memref<f32>
-  %rhs = memref.alloc() : memref<4x8xf32>
-  %O0 = memref.alloc() : memref<4x8xf32>
-  %O1 = memref.alloc() : memref<4x8xf32>
-  linalg.fill ins(%v1 : f32) outs(%lhs : memref<f32>)
-  linalg.fill ins(%v2 : f32) outs(%rhs : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%O0 : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%O1 : memref<4x8xf32>)
-
-  call @elemwise_exp_add_on_buffers(%lhs, %rhs, %O0) :
-    (memref<f32>, memref<4x8xf32>, memref<4x8xf32>) -> ()
-  call @elemwise_log_mul_on_buffers(%lhs, %rhs, %O1) :
-    (memref<f32>, memref<4x8xf32>, memref<4x8xf32>) -> ()
-
-  %c0 = arith.constant 0 : index
-  %res0 = memref.load %O0[%c0, %c0] : memref<4x8xf32>
-  %res1 = memref.load %O1[%c0, %c0] : memref<4x8xf32>
-
-  %0 = arith.addf %res0, %res1 : f32
-
-  // TODO: FFI-based solution to allow testing and printing with python code.
-  return %0 : f32
-}
-"""
-
 fill_boiler = """
 func.func @main() -> i32 attributes {llvm.emit_c_interface} {
   %O0 = memref.alloc() : memref<i32>
@@ -177,96 +146,6 @@ def transform(module, boilerplate):
     return mod
 
 
-def test_elemwise_builtin():
-    with Context() as ctx, Location.unknown():
-        module = Module.create()
-        f32 = F32Type.get()
-        i8 = IntegerType.get_signless(8)
-        with InsertionPoint(module.body):
-
-            @func.FuncOp.from_py_func(
-                MemRefType.get((), f32),
-                MemRefType.get((4, 8), f32),
-                MemRefType.get((4, 8), f32),
-            )
-            def elemwise_exp_add_on_buffers(lhs, rhs, out):
-                linalg.elemwise_unary(lhs, outs=[out])
-                linalg.elemwise_binary(out, rhs, outs=[out])
-
-            @func.FuncOp.from_py_func(
-                MemRefType.get((), f32),
-                MemRefType.get((4, 8), f32),
-                MemRefType.get((4, 8), f32),
-            )
-            def elemwise_log_mul_on_buffers(lhs, rhs, out):
-                linalg.elemwise_unary(lhs, outs=[out], fun=UnaryFn.log)
-                linalg.elemwise_binary(out, rhs, outs=[out], fun=BinaryFn.mul)
-
-        execution_engine = ExecutionEngine(transform(module, elemwise_boiler))
-
-        # TODO: FFI-based solution to allow testing and printing with python code.
-        # Prepare arguments: one result f32.
-        # Arguments must be passed as pointers.
-        c_float_p = ctypes.c_float * 1
-        res = c_float_p(-1.0)
-        execution_engine.invoke("main", res)
-
-        log("RESULT: ", res[0])
-        # elemwise_exp_add_on_buffers: exp(1.0) + 2.0 = 4.71828182846
-        # elemwise_log_mul_on_buffers: log(1.0) * 2.0 = 0.0
-        # CHECK: RESULT: 4.71828
-
-
-test_elemwise_builtin()
-
-
-def test_elemwise_generic():
-    with Context() as ctx, Location.unknown():
-        module = Module.create()
-        f32 = F32Type.get()
-        i8 = IntegerType.get_signless(8)
-        with InsertionPoint(module.body):
-
-            @func.FuncOp.from_py_func(
-                MemRefType.get((), f32),
-                MemRefType.get((4, 8), f32),
-                MemRefType.get((4, 8), f32),
-            )
-            def elemwise_exp_add_on_buffers(lhs, rhs, out):
-                linalg.elemwise_unary(lhs, outs=[out], emit_generic=True)
-                linalg.elemwise_binary(out, rhs, outs=[out], emit_generic=True)
-
-            @func.FuncOp.from_py_func(
-                MemRefType.get((), f32),
-                MemRefType.get((4, 8), f32),
-                MemRefType.get((4, 8), f32),
-            )
-            def elemwise_log_mul_on_buffers(lhs, rhs, out):
-                linalg.elemwise_unary(
-                    lhs, outs=[out], fun=UnaryFn.log, emit_generic=True
-                )
-                linalg.elemwise_binary(
-                    out, rhs, outs=[out], fun=BinaryFn.mul, emit_generic=True
-                )
-
-        execution_engine = ExecutionEngine(transform(module, elemwise_boiler))
-
-        # TODO: FFI-based solution to allow testing and printing with python code.
-        # Prepare arguments: one result f32.
-        # Arguments must be passed as pointers.
-        c_float_p = ctypes.c_float * 1
-        res = c_float_p(-1.0)
-        execution_engine.invoke("main", res)
-
-        log("RESULT: ", res[0])
-        # elemwise_exp_add_on_buffers: exp(1.0) + 2.0 = 4.71828182846
-        # elemwise_log_mul_on_buffers: log(1.0) * 2.0 = 0.0
-        # CHECK: RESULT: 4.71828
-
-
-test_elemwise_generic()
-
-
 def test_fill_builtin():
     with Context() as ctx, Location.unknown():
         module = Module.create()

>From a64cf5c73fe6ee79222a22db4886f306c5ebc648 Mon Sep 17 00:00:00 2001
From: Renato Golin <rengolin at systemcall.eu>
Date: Sat, 5 Jul 2025 17:59:56 +0100
Subject: [PATCH 5/6] Use linalg.elementwise and fix fuse test to match
 previous behaviour

---
 .../Linalg/transform-op-fuse-into-containing.mlir  | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
index 312468970ae6d..e5216089692b4 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
@@ -26,8 +26,8 @@ module {
       // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]]
       %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: %[[T2:.*]] = linalg.exp ins(%[[T1]]
-      %7 = linalg.exp ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T2:.*]] = linalg.elementwise kind=#linalg.elementwise_kind<exp> ins(%[[T1]]
+      %7 = linalg.elementwise kind=#linalg.elementwise_kind<exp> ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -261,7 +261,7 @@ module {
     %c2 = arith.constant 2 : index
     %c1 = arith.constant 1 : index
     %c0 = arith.constant 0 : index
-    %0 = linalg.exp {fun = #linalg.unary_fn<abs>} ins(%arg0 : tensor<?x?x?xf32>) outs(%arg1 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+    %0 = linalg.elementwise kind=#linalg.elementwise_kind<abs> ins(%arg0 : tensor<?x?x?xf32>) outs(%arg1 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
     %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
     %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
     %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
@@ -272,12 +272,12 @@ module {
       %2 = scf.for %arg4 = %c0 to %dim_0 step %c1 iter_args(%arg5 = %arg3) -> (tensor<?x?x?xf32>) {
         %3 = scf.for %arg6 = %c0 to %dim_1 step %c1 iter_args(%arg7 = %arg5) -> (tensor<?x?x?xf32>) {
           // CHECK:  %[[EX1:.*]] = tensor.extract_slice %[[BBARG2]]{{.*}}: tensor<?x?x?xf32> to tensor<1x1x1xf32>
-          // CHECK:  linalg.exp {fun = #linalg.unary_fn<abs>} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX1]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
+          // CHECK:  linalg.elementwise kind=#linalg.elementwise_kind<abs> ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX1]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
           // CHECK:  %[[EX2:.*]] = tensor.extract_slice %[[BBARG2]]{{.*}} : tensor<?x?x?xf32> to tensor<1x1x1xf32>
-          // CHECK:  linalg.exp {fun = #linalg.unary_fn<exp>} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX2]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
+          // CHECK:  linalg.elementwise kind=#linalg.elementwise_kind<exp> ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX2]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
           %extracted_slice = tensor.extract_slice %0[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor<?x?x?xf32> to tensor<1x1x1xf32>
           %extracted_slice_2 = tensor.extract_slice %arg7[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor<?x?x?xf32> to tensor<1x1x1xf32>
-          %4 = linalg.exp {fun = #linalg.unary_fn<exp>} ins(%extracted_slice : tensor<1x1x1xf32>) outs(%extracted_slice_2 : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
+          %4 = linalg.elementwise kind=#linalg.elementwise_kind<exp> ins(%extracted_slice : tensor<1x1x1xf32>) outs(%extracted_slice_2 : tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
           %inserted_slice = tensor.insert_slice %4 into %arg7[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor<1x1x1xf32> into tensor<?x?x?xf32>
           scf.yield %inserted_slice : tensor<?x?x?xf32>
         }
@@ -290,7 +290,7 @@ module {
 
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.elementwise"]} in %arg0 : (!transform.any_op) -> !transform.any_op
       %1 = transform.structured.match ops{["scf.for"]} in %arg0 : (!transform.any_op) -> !transform.any_op
       %2:2 = transform.split_handle %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
       %3:3 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)

>From 2d6ccc8d1d69f8b7f333cc2454f8fdf6bb98d673 Mon Sep 17 00:00:00 2001
From: Renato Golin <rengolin at systemcall.eu>
Date: Sat, 5 Jul 2025 18:43:38 +0100
Subject: [PATCH 6/6] Move MLIR transform examples to linalg.elementwise

---
 mlir/docs/Tutorials/transform/Ch1.md          | 44 +++++++++----------
 mlir/docs/Tutorials/transform/Ch2.md          |  4 +-
 mlir/docs/Tutorials/transform/Ch4.md          | 18 ++++----
 mlir/docs/Tutorials/transform/ChH.md          |  2 +-
 .../transform/Ch1/invalidation-1.mlir         | 22 +++++-----
 .../transform/Ch1/invalidation-2.mlir         | 14 +++---
 .../test/Examples/transform/Ch1/sequence.mlir | 20 ++++-----
 .../test/Examples/transform/Ch2/sequence.mlir | 18 ++++----
 .../test/Examples/transform/Ch3/sequence.mlir | 18 ++++----
 .../test/Examples/transform/Ch4/multiple.mlir | 20 ++++-----
 .../test/Examples/transform/Ch4/sequence.mlir | 14 +++---
 11 files changed, 97 insertions(+), 97 deletions(-)

diff --git a/mlir/docs/Tutorials/transform/Ch1.md b/mlir/docs/Tutorials/transform/Ch1.md
index b0fdf085854c7..7463a9afbe97e 100644
--- a/mlir/docs/Tutorials/transform/Ch1.md
+++ b/mlir/docs/Tutorials/transform/Ch1.md
@@ -19,13 +19,13 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
                           outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise addition.
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
   %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
     ins(%biased, %c0f : tensor<512x512xf32>, f32)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
@@ -41,7 +41,7 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
       %arg0: !transform.any_op,
       %arg1: !transform.op<"linalg.matmul">,
-      %arg2: !transform.op<"linalg.elemwise_binary">):
+      %arg2: !transform.op<"linalg.elementwise">):
     transform.yield
   }
 }
@@ -72,11 +72,11 @@ To check or debug a transform sequence, it is possible to print various entities
 transform.sequence failures(propagate) {
 ^bb0(%arg0: !transform.any_op,
      %arg1: !transform.op<"linalg.matmul">,
-     %arg2: !transform.op<"linalg.elemwise_binary">):
+     %arg2: !transform.op<"linalg.elementwise">):
   transform.debug.emit_remark_at %arg1, "matmul"
       : !transform.op<"linalg.matmul">
   transform.debug.emit_remark_at %arg2, "elemwise_binaries"
-      : !transform.op<"linalg.elemwise_binary">
+      : !transform.op<"linalg.elementwise">
   transform.yield
 }
 ```
@@ -89,10 +89,10 @@ Since we don’t want to recompile the compiler every time we change a transform
 ```sh
 $ mlir-opt sequence.mlir --pass-pipeline="
     builtin.module(transform-interpreter{
-        debug-bind-trailing-args=linalg.matmul,linalg.elemwise_binary})"
+        debug-bind-trailing-args=linalg.matmul,linalg.elementwise})"
 ```
 
-The `sequence.mlir` file contains _both_ the payload IR function _and_ the transform IR sequence nested in the same module. The transform interpreter pass will apply the `@__transform_main` named sequence to the anchor operation of the pass. In our case, we also asked the interpreter pass to associate the two extra arguments of the top-level sequence with all `linalg.matmul` and `linalg.elemwise_binary` payload operations through the respective pass options. Running this pass results in the expected remarks:
+The `sequence.mlir` file contains _both_ the payload IR function _and_ the transform IR sequence nested in the same module. The transform interpreter pass will apply the `@__transform_main` named sequence to the anchor operation of the pass. In our case, we also asked the interpreter pass to associate the two extra arguments of the top-level sequence with all `linalg.matmul` and `linalg.elementwise` payload operations through the respective pass options. Running this pass results in the expected remarks:
 
 ```sh
 sequence.mlir:7:13: remark: matmul
@@ -100,13 +100,13 @@ sequence.mlir:7:13: remark: matmul
             ^
 sequence.mlir:7:13: note: see current operation: %0 = linalg.matmul ins(%arg0, %arg1 : tensor<512x512xf32>, tensor<512x512xf32>) outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32>
 sequence.mlir:10:13: remark: elemwise_binaries
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
             ^
-sequence.mlir:10:13: note: see current operation: %1 = linalg.elemwise_binary {fun = #linalg.binary_fn<add>} ins(%0, %arg2 : tensor<512x512xf32>, tensor<512x512xf32>) outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32>
+sequence.mlir:10:13: note: see current operation: %1 = linalg.elementwise kind=#linalg.elementwise_kind<add>> ins(%0, %arg2 : tensor<512x512xf32>, tensor<512x512xf32>) outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32>
 sequence.mlir:14:13: remark: elemwise_binaries
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
             ^
-sequence.mlir:14:13: note: see current operation: %2 = linalg.elemwise_binary {fun = #linalg.binary_fn<max_signed>} ins(%1, %cst : tensor<512x512xf32>, f32) outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32>
+sequence.mlir:14:13: note: see current operation: %2 = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>> ins(%1, %cst : tensor<512x512xf32>, f32) outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32>
 ```
 
 Note that `%arg2` is associated with both elementwise payload operations. Any handle is associated with a list of entities. Individual transformations may or may not care about the order of elements in that list.
@@ -121,7 +121,7 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
        %arg0: !transform.any_op,
        %arg1: !transform.op<"linalg.matmul">,
-       %arg2: !transform.op<"linalg.elemwise_binary">) {
+       %arg2: !transform.op<"linalg.elementwise">) {
     // The actual tiling transformation takes tile sizes as attributes.
     %loop, %tiled = transform.structured.tile_using_forall %arg1
                     tile_sizes [4, 32]
@@ -163,10 +163,10 @@ func.func @fc_relu(%arg0: tensor<512x512xf32>,
           : tensor<4x32xf32> into tensor<512x512xf32>
     }
   }
-  %1 = linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
+  %1 = linalg.elementwise kind=#linalg.elementwise_kind<add>>
     ins(%0, %arg2 : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32>
-  %2 = linalg.elemwise_binary {fun = #linalg.binary_fn<max_signed>}
+  %2 = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>>
     ins(%1, %cst : tensor<512x512xf32>, f32)
     outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32>
   return %2 : tensor<512x512xf32>
@@ -185,7 +185,7 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
        %arg0: !transform.any_op,
        %arg1: !transform.op<"linalg.matmul">,
-       %arg2: !transform.op<"linalg.elemwise_binary">) {
+       %arg2: !transform.op<"linalg.elementwise">) {
     // The actual tiling transformation takes tile sizes as attributes.
     %loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
         : (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
@@ -219,7 +219,7 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main
        %arg0: !transform.any_op,
        %arg1: !transform.op<"linalg.matmul">,
-       %arg2: !transform.op<"linalg.elemwise_binary">) {
+       %arg2: !transform.op<"linalg.elementwise">) {
     // We can cast one type to another as long as operations are compatible
     // with both types. This creates "aliasing" handles.
     %casted = transform.cast %arg1 : !transform.op<"linalg.matmul">
@@ -248,7 +248,7 @@ sequence.mlir:28:3: error: op uses a handle invalidated by a previously executed
   transform.debug.emit_remark_at %matmul, "elemwise_binaries" : !transform.op<"linalg.matmul">
   ^
 sequence.mlir:21:29: note: handle to invalidated ops
-^bb0(%root: !transform.any_op, %matmul: !transform.op<"linalg.matmul">, %elemwise: !transform.op<"linalg.elemwise_binary">):
+^bb0(%root: !transform.any_op, %matmul: !transform.op<"linalg.matmul">, %elemwise: !transform.op<"linalg.elementwise">):
                             ^
 sequence.mlir:27:19: note: invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them
   %loop, %tiled = transform.structured.tile_using_forall %mm tile_sizes [4, 32]
@@ -263,12 +263,12 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
        %arg0: !transform.any_op,
        %arg1: !transform.op<"linalg.matmul">,
-       %arg2: !transform.op<"linalg.elemwise_binary">) {
+       %arg2: !transform.op<"linalg.elementwise">) {
     // Since the %arg2 handle is associated with both elementwise operations,
     // we need to split it into two handles so we can target only the second
     // elementwise operation.
     %add, %max = transform.split_handle %arg2
-        : (!transform.op<"linalg.elemwise_binary">)
+        : (!transform.op<"linalg.elementwise">)
         -> (!transform.any_op, !transform.any_op)
 
     // The actual tiling transformation takes tile sizes as attributes. It
@@ -308,12 +308,12 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
        %arg0: !transform.any_op,
        %arg1: !transform.op<"linalg.matmul">,
-       %arg2: !transform.op<"linalg.elemwise_binary">) {
+       %arg2: !transform.op<"linalg.elementwise">) {
     // Since the %arg2 handle is associated with both elementwise operations,
     // we need to split it into two handles so we can target only the second
     // elementwise operation.
     %add, %max = transform.split_handle %arg2
-        : (!transform.op<"linalg.elemwise_binary">)
+        : (!transform.op<"linalg.elementwise">)
           -> (!transform.any_op, !transform.any_op)
 
     // The actual tiling transformation takes tile sizes as attributes. It
@@ -384,7 +384,7 @@ test/Examples/transform/Ch1/invalidation-2.mlir:106:18: note: invalidated by thi
   %func, %call = transform.loop.outline %outline_target {func_name = "outlined"}
                  ^
 test/Examples/transform/Ch1/invalidation-2.mlir:24:13: note: ancestor payload op
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
             ^
 test/Examples/transform/Ch1/invalidation-2.mlir:24:13: note: nested payload op
   %matmul = linalg.matmul ins(%lhs, %rhs: tensor<512x512xf32>, tensor<512x512xf32>)
diff --git a/mlir/docs/Tutorials/transform/Ch2.md b/mlir/docs/Tutorials/transform/Ch2.md
index 6a6cefd8785aa..0f45f5607bab9 100644
--- a/mlir/docs/Tutorials/transform/Ch2.md
+++ b/mlir/docs/Tutorials/transform/Ch2.md
@@ -290,12 +290,12 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
       %arg0: !transform.any_op,
       %arg1: !transform.op<"linalg.matmul">,
-      %arg2: !transform.op<"linalg.elemwise_binary">) {
+      %arg2: !transform.op<"linalg.elementwise">) {
     // Since the %arg2 handle is associated with both elementwise operations,
     // we need to split it into two handles so we can target only the second
     // elementwise operation.
     %add, %max = transform.split_handle %arg2
-        : (!transform.op<"linalg.elemwise_binary">)
+        : (!transform.op<"linalg.elementwise">)
         -> (!transform.any_op, !transform.any_op)
 
     // The actual tiling transformation takes tile sizes as attributes. It
diff --git a/mlir/docs/Tutorials/transform/Ch4.md b/mlir/docs/Tutorials/transform/Ch4.md
index ac60fcb914685..f9a0dff5c90e3 100644
--- a/mlir/docs/Tutorials/transform/Ch4.md
+++ b/mlir/docs/Tutorials/transform/Ch4.md
@@ -42,13 +42,13 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
             outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise addition.
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
   %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
     ins(%biased, %c0f : tensor<512x512xf32>, f32)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
@@ -59,7 +59,7 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
 
 In Chapter 1, we were calling the test transform interpreter pass with
 additional arguments, `bind-first-extra-to-ops=linalg.matmul
-bind-second-extra-to-ops=linalg.elemwise_binary`, to provide initial
+bind-second-extra-to-ops=linalg.elementwise`, to provide initial
 associations for operation handles. Instead, we can use match operations to
 discover relevant operations in the payload IR. Match operations can be combined
 with “regular” transform operations using, e.g., the
@@ -97,7 +97,7 @@ module @transforms attributes { transform.with_named_sequence } {
   // rewriter sequence on success.
   transform.named_sequence @match_elemwise(
       %entry: !transform.any_op {transform.readonly}) -> !transform.any_op {
-    transform.match.operation_name %entry ["linalg.elemwise_binary"]
+    transform.match.operation_name %entry ["linalg.elementwise"]
       : !transform.any_op
     transform.yield %entry : !transform.any_op
   }
@@ -127,7 +127,7 @@ module @transforms attributes { transform.with_named_sequence } {
 This script can be executed using the non-test interpreter pass running on the
 root operation of the translation unit without additional flags: `mlir-opt
 --transform-interpreter`. It will emit corresponding remarks at
-`linalg.elemwise_binary` and `linalg.matmul` operations. In debug builds, the
+`linalg.elementwise` and `linalg.matmul` operations. In debug builds, the
 infrastructure provides a convenient method to understand the matching process
 by passing `-debug-only=transform-matcher` to `mlir-opt` or a derived tool. It
 will print the silenceable failure messages produced by the match operations
@@ -169,7 +169,7 @@ transform.named_sequence @match_matmul_elemwise(
     %last: !transform.any_op {transform.readonly})
     -> (!transform.any_op, !transform.any_op, !transform.any_op) {
   // The last operation must be an elementwise binary.
-  transform.match.operation_name %last ["linalg.elemwise_binary"]
+  transform.match.operation_name %last ["linalg.elementwise"]
     : !transform.any_op
   // Its first operand must be defined by another operation, to which we
   // will get a handle here. We are guaranteed that the first operand exists
@@ -179,7 +179,7 @@ transform.named_sequence @match_matmul_elemwise(
   %middle = transform.get_producer_of_operand %last[0]
     : (!transform.any_op) -> !transform.any_op
   // The defining operation must itself be an elementwise binary.
-  transform.match.operation_name %middle ["linalg.elemwise_binary"]
+  transform.match.operation_name %middle ["linalg.elementwise"]
     : !transform.any_op
   // And the first operand of that operation must be defined by yet another
   // operation.
@@ -399,7 +399,7 @@ transform.named_sequence @match_matmul_elemwise(
     -> (!transform.any_op, !transform.any_op, !transform.any_op,
         !transform.param<i32>) {
   // The last operation must be an elementwise binary.
-  transform.match.operation_name %last ["linalg.elemwise_binary"]
+  transform.match.operation_name %last ["linalg.elementwise"]
     : !transform.any_op
 
   // One of its operands must be defined by another operation, to which we
@@ -413,7 +413,7 @@ transform.named_sequence @match_matmul_elemwise(
     %def = transform.get_defining_op %operand
       : (!transform.any_value) -> !transform.any_op
     // The defining operation must itself be an elementwise binary.
-    transform.match.operation_name %def ["linalg.elemwise_binary"]
+    transform.match.operation_name %def ["linalg.elementwise"]
       : !transform.any_op
     transform.yield %def : !transform.any_op
   }
diff --git a/mlir/docs/Tutorials/transform/ChH.md b/mlir/docs/Tutorials/transform/ChH.md
index aadfddb89393f..ce24c5add631c 100644
--- a/mlir/docs/Tutorials/transform/ChH.md
+++ b/mlir/docs/Tutorials/transform/ChH.md
@@ -290,7 +290,7 @@ scf.forall (%co) in (2) {
   scf.forall (%n, %y, %xo) in (5, 80, 20) {
     tensor.extract_slice
     // Implicit dimensions [ni=0:1, y=0:1, xi=0:5, ci=0:64]
-    %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> } // ...
+    %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed> // ...
     scf.forall.in_parallel {
       tensor.parallel_insert_slice // ...
     }
diff --git a/mlir/test/Examples/transform/Ch1/invalidation-1.mlir b/mlir/test/Examples/transform/Ch1/invalidation-1.mlir
index 2264ade7f9b77..d8b8039631de9 100644
--- a/mlir/test/Examples/transform/Ch1/invalidation-1.mlir
+++ b/mlir/test/Examples/transform/Ch1/invalidation-1.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s \
 // RUN:   --pass-pipeline="builtin.module(transform-interpreter{ \
-// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elemwise_binary},\
+// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elementwise},\
 // RUN:        canonicalize,cse,symbol-dce)" \
 // RUN:   --split-input-file --verify-diagnostics
 
@@ -16,7 +16,7 @@ module attributes {transform.with_named_sequence} {
       %arg0: !transform.any_op,
       // expected-note @below {{handle to invalidated ops}}
       %arg1: !transform.op<"linalg.matmul">,
-      %arg2: !transform.op<"linalg.elemwise_binary">) {
+      %arg2: !transform.op<"linalg.elementwise">) {
     // The actual tiling transformation takes tile sizes as attributes.
     // expected-note @below {{invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them}}
     %tiled, %loop = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
@@ -39,14 +39,14 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
                           outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise addition.
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
@@ -57,7 +57,7 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
       %arg0: !transform.any_op,
       %arg1: !transform.op<"linalg.matmul">,
-      %arg2: !transform.op<"linalg.elemwise_binary">) {
+      %arg2: !transform.op<"linalg.elementwise">) {
     // We can cast one type to another as long as operations are compatible
     // with both types. This creates "aliasing" handles.
     // expected-note @below {{handle to invalidated ops}}
@@ -88,14 +88,14 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
                           outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise addition.
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
diff --git a/mlir/test/Examples/transform/Ch1/invalidation-2.mlir b/mlir/test/Examples/transform/Ch1/invalidation-2.mlir
index 0a84a5cb6c68a..e09aca31a90da 100644
--- a/mlir/test/Examples/transform/Ch1/invalidation-2.mlir
+++ b/mlir/test/Examples/transform/Ch1/invalidation-2.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s \
 // RUN:   --pass-pipeline="builtin.module(transform-interpreter{ \
-// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elemwise_binary},\
+// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elementwise},\
 // RUN:        canonicalize,cse,symbol-dce)" \
 // RUN:   --split-input-file --verify-diagnostics
 // ****************************** IMPORTANT NOTE ******************************
@@ -23,14 +23,14 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
   // Elementwise addition.
 
   // expected-note @below {{ancestor payload op}}
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
@@ -47,11 +47,11 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
       %arg0: !transform.any_op,
       %arg1: !transform.op<"linalg.matmul">,
-      %arg2: !transform.op<"linalg.elemwise_binary">) {
+      %arg2: !transform.op<"linalg.elementwise">) {
   // Since the %arg2 handle is associated with both elementwise operations,
   // we need to split it into two handles so we can target only the second
   // elementwise operation.
-  %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elemwise_binary">)
+  %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elementwise">)
       -> (!transform.any_op, !transform.any_op)
 
   // The actual tiling transformation takes tile sizes as attributes. It produces a
diff --git a/mlir/test/Examples/transform/Ch1/sequence.mlir b/mlir/test/Examples/transform/Ch1/sequence.mlir
index 3107adccf78fd..f8dfd2de01d97 100644
--- a/mlir/test/Examples/transform/Ch1/sequence.mlir
+++ b/mlir/test/Examples/transform/Ch1/sequence.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s \
 // RUN:   --pass-pipeline="builtin.module(transform-interpreter{ \
-// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elemwise_binary},\
+// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elementwise},\
 // RUN:        canonicalize,cse,symbol-dce)" |\
 // RUN: FileCheck %s
 
@@ -20,21 +20,21 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
                           outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise addition.
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
 
 // CHECK: func @outlined
 // CHECK:   linalg.matmul
-// CHECK:   linalg.elemwise_binary {fun = #linalg.binary_fn<add>}
+// CHECK:   linalg.elementwise kind=#linalg.elementwise_kind<add>
 
 // CHECK-LABEL: func @fc_relu
 // CHECK: scf.forall
@@ -46,9 +46,9 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
 // CHECK:     %[[SLICE8:.+]] = tensor.extract_slice
 // CHECK:     func.call @outlined(%[[SLICE4]], %[[SLICE5]], %[[SLICE6]], %[[SLICE7]], %[[SLICE8]])
 // CHECK-NOT: linalg.matmul
-// CHECK-NOT: linalg.elemwise_binary
+// CHECK-NOT: linalg.elementwise
 // CHECK:     scf.forall.in_parallel
-// CHECK:   linalg.elemwise_binary {fun = #linalg.binary_fn<max_signed>}
+// CHECK:   linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
 // CHECK:   scf.forall.in_parallel
 
 // Declaration of the "microkernel" function that we will be targeting.
@@ -63,11 +63,11 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
       %arg0: !transform.any_op,
       %arg1: !transform.op<"linalg.matmul">,
-      %arg2: !transform.op<"linalg.elemwise_binary">) {
+      %arg2: !transform.op<"linalg.elementwise">) {
     // Since the %arg2 handle is associated with both elementwise operations,
     // we need to split it into two handles so we can target only the second
     // elementwise operation.
-    %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elemwise_binary">)
+    %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elementwise">)
         -> (!transform.any_op, !transform.any_op)
   
     // The actual tiling transformation takes tile sizes as attributes. It produces a
diff --git a/mlir/test/Examples/transform/Ch2/sequence.mlir b/mlir/test/Examples/transform/Ch2/sequence.mlir
index 976df1d55503a..8d6e129c25f7d 100644
--- a/mlir/test/Examples/transform/Ch2/sequence.mlir
+++ b/mlir/test/Examples/transform/Ch2/sequence.mlir
@@ -1,6 +1,6 @@
 // RUN: transform-opt-ch2 %s \
 // RUN:   --pass-pipeline="builtin.module(transform-interpreter{ \
-// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elemwise_binary},\
+// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elementwise},\
 // RUN:        canonicalize,cse,symbol-dce)" |\
 // RUN: FileCheck %s
 
@@ -20,14 +20,14 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
                           outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise addition.
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
@@ -42,9 +42,9 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
 // CHECK:     %[[SLICE8:.+]] = tensor.extract_slice
 // CHECK:     func.call @microkernel(%[[SLICE4]], %[[SLICE5]], %[[SLICE6]], %[[SLICE7]], %[[SLICE8]])
 // CHECK-NOT: linalg.matmul
-// CHECK-NOT: linalg.elemwise_binary
+// CHECK-NOT: linalg.elementwise
 // CHECK:     scf.forall.in_parallel
-// CHECK:   linalg.elemwise_binary {fun = #linalg.binary_fn<max_signed>}
+// CHECK:   linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
 // CHECK:   scf.forall.in_parallel
 
 // Declaration of the "microkernel" function that we will be targeting.
@@ -59,11 +59,11 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
       %arg0: !transform.any_op,
       %arg1: !transform.op<"linalg.matmul">,
-      %arg2: !transform.op<"linalg.elemwise_binary">) {
+      %arg2: !transform.op<"linalg.elementwise">) {
     // Since the %arg2 handle is associated with both elementwise operations,
     // we need to split it into two handles so we can target only the second
     // elementwise operation.
-    %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elemwise_binary">)
+    %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elementwise">)
         -> (!transform.any_op, !transform.any_op)
 
     // The actual tiling transformation takes tile sizes as attributes. It produces a
diff --git a/mlir/test/Examples/transform/Ch3/sequence.mlir b/mlir/test/Examples/transform/Ch3/sequence.mlir
index 8dc33c3560c26..4d28518ca3d9e 100644
--- a/mlir/test/Examples/transform/Ch3/sequence.mlir
+++ b/mlir/test/Examples/transform/Ch3/sequence.mlir
@@ -1,6 +1,6 @@
 // RUN: transform-opt-ch3 %s \
 // RUN:   --pass-pipeline="builtin.module(transform-interpreter{ \
-// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elemwise_binary},\
+// RUN:        debug-bind-trailing-args=linalg.matmul,linalg.elementwise},\
 // RUN:        canonicalize,cse,symbol-dce)" |\
 // RUN: FileCheck %s
 
@@ -20,14 +20,14 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
                           outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise addition.
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
@@ -42,9 +42,9 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
 // CHECK:     %[[SLICE8:.+]] = tensor.extract_slice
 // CHECK:     func.call @microkernel(%[[SLICE4]], %[[SLICE5]], %[[SLICE6]], %[[SLICE7]], %[[SLICE8]])
 // CHECK-NOT: linalg.matmul
-// CHECK-NOT: linalg.elemwise_binary
+// CHECK-NOT: linalg.elementwise
 // CHECK:     scf.forall.in_parallel
-// CHECK:   linalg.elemwise_binary {fun = #linalg.binary_fn<max_signed>}
+// CHECK:   linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
 // CHECK:   scf.forall.in_parallel
 
 // Declaration of the "microkernel" function that we will be targeting.
@@ -59,11 +59,11 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(
        %arg0: !transform.any_op,
        %arg1: !transform.op<"linalg.matmul">,
-       %arg2: !transform.op<"linalg.elemwise_binary">) {
+       %arg2: !transform.op<"linalg.elementwise">) {
     // Since the %arg2 handle is associated with both elementwise operations,
     // we need to split it into two handles so we can target only the second
     // elementwise operation.
-    %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elemwise_binary">)
+    %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elementwise">)
         -> (!transform.any_op, !transform.any_op)
   
     // The actual tiling transformation takes tile sizes as attributes. It produces a
diff --git a/mlir/test/Examples/transform/Ch4/multiple.mlir b/mlir/test/Examples/transform/Ch4/multiple.mlir
index 2c2c705905521..e6ab5ca65235d 100644
--- a/mlir/test/Examples/transform/Ch4/multiple.mlir
+++ b/mlir/test/Examples/transform/Ch4/multiple.mlir
@@ -12,15 +12,15 @@ func.func @fc_relu_operands_00(
 
   // Elementwise addition.
   // expected-remark @below {{add # 0}}
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
   // expected-remark @below {{max # 0}}
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
@@ -37,15 +37,15 @@ func.func @fc_relu_operands_01(
 
   // Elementwise addition.
   // expected-remark @below {{add # 1}}
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
   // expected-remark @below {{max # 1}}
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%c0f, %biased : f32, tensor<512x512xf32>)
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%c0f, %biased : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
@@ -97,7 +97,7 @@ module @transforms attributes { transform.with_named_sequence } {
       -> (!transform.any_op, !transform.any_op, !transform.any_op,
           !transform.param<i32>) {
     // The last operation must be an elementwise binary.
-    transform.match.operation_name %last ["linalg.elemwise_binary"]
+    transform.match.operation_name %last ["linalg.elementwise"]
       : !transform.any_op
 
     // One of its operands must be defined by another operation, to which we
@@ -111,7 +111,7 @@ module @transforms attributes { transform.with_named_sequence } {
       %def = transform.get_defining_op %operand 
         : (!transform.any_value) -> !transform.any_op
       // The defining operation must itself be an elementwise binary.
-      transform.match.operation_name %def ["linalg.elemwise_binary"]
+      transform.match.operation_name %def ["linalg.elementwise"]
         : !transform.any_op
       transform.yield %def : !transform.any_op
     }
diff --git a/mlir/test/Examples/transform/Ch4/sequence.mlir b/mlir/test/Examples/transform/Ch4/sequence.mlir
index 304533f706413..7dd725dfe6884 100644
--- a/mlir/test/Examples/transform/Ch4/sequence.mlir
+++ b/mlir/test/Examples/transform/Ch4/sequence.mlir
@@ -22,15 +22,15 @@ func.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>,
 
   // Elementwise addition.
   // expected-remark @below {{elementwise binary}}
-  %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> }
+  %biased = linalg.elementwise kind=#linalg.elementwise_kind<add>
     ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
 
   // Elementwise max with 0 (ReLU).
-  %c0f = arith.constant 0.0 : f32
+  %c0f = arith.constant dense<0.0> : tensor<512x512xf32>
   // expected-remark @below {{elementwise binary}}
-  %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> }
-    ins(%biased, %c0f : tensor<512x512xf32>, f32)
+  %relued = linalg.elementwise kind=#linalg.elementwise_kind<max_signed>
+    ins(%biased, %c0f : tensor<512x512xf32>, tensor<512x512xf32>)
     outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32>
   func.return %relued : tensor<512x512xf32>
 }
@@ -82,7 +82,7 @@ module @transforms attributes { transform.with_named_sequence } {
   // rewriter sequence on success.
   transform.named_sequence @match_elemwise(
       %entry: !transform.any_op {transform.readonly}) -> !transform.any_op {
-    transform.match.operation_name %entry ["linalg.elemwise_binary"] 
+    transform.match.operation_name %entry ["linalg.elementwise"] 
       : !transform.any_op
     transform.yield %entry : !transform.any_op
   }
@@ -113,7 +113,7 @@ module @transforms attributes { transform.with_named_sequence } {
       %last: !transform.any_op {transform.readonly}) 
       -> (!transform.any_op, !transform.any_op, !transform.any_op) {
     // The last operation must be an elementwise binary.
-    transform.match.operation_name %last ["linalg.elemwise_binary"]
+    transform.match.operation_name %last ["linalg.elementwise"]
       : !transform.any_op
     // Its first operand must be defined by another operation, to which we
     // will get a handle here. We are guaranteed that the first operand exists
@@ -123,7 +123,7 @@ module @transforms attributes { transform.with_named_sequence } {
     %middle = transform.get_producer_of_operand %last[0]
       : (!transform.any_op) -> !transform.any_op
     // The defining operation must itself be an elementwise binary.
-    transform.match.operation_name %middle ["linalg.elemwise_binary"]
+    transform.match.operation_name %middle ["linalg.elementwise"]
       : !transform.any_op
     // And the first operand of that operation must be defined by yet another
     // operation.