[Mlir-commits] [mlir] eeb09fd - [mlir][Linalg] Enable vectorization of 'mul', 'and', 'or' and 'xor' reductions

Tue Oct 12 14:10:21 PDT 2021

Author: Diego Caballero
Date: 2021-10-12T21:08:23Z
New Revision: eeb09fd6465e3ee4d564fb0ab948231baf1daadd

URL: https://github.com/llvm/llvm-project/commit/eeb09fd6465e3ee4d564fb0ab948231baf1daadd
DIFF: https://github.com/llvm/llvm-project/commit/eeb09fd6465e3ee4d564fb0ab948231baf1daadd.diff

LOG: [mlir][Linalg] Enable vectorization of 'mul', 'and', 'or' and 'xor' reductions

This patch adds support for vectorizing 'mul', 'and', 'or' anx 'xor' reductions
to Linalg.

Reviewed By: pifon2a, ThomasRaoux, aartbik

Differential Revision: https://reviews.llvm.org/D111565

Added: 
    

Modified: 
    mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
    mlir/test/Dialect/Linalg/vectorization.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index cbe7e3f43067..a1ea54213cd1 100644

--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -122,10 +122,14 @@ getKindForOp(Operation *reductionOp) {
   return llvm::TypeSwitch<Operation *, llvm::Optional<vector::CombiningKind>>(
              reductionOp)
       .Case<AddIOp, AddFOp>([&](auto op) { return vector::CombiningKind::ADD; })
+      .Case<AndOp>([&](auto op) { return vector::CombiningKind::AND; })
       .Case<MaxSIOp>([&](auto op) { return vector::CombiningKind::MAXSI; })
       .Case<MaxFOp>([&](auto op) { return vector::CombiningKind::MAXF; })
       .Case<MinSIOp>([&](auto op) { return vector::CombiningKind::MINSI; })
       .Case<MinFOp>([&](auto op) { return vector::CombiningKind::MINF; })
+      .Case<MulIOp, MulFOp>([&](auto op) { return vector::CombiningKind::MUL; })
+      .Case<OrOp>([&](auto op) { return vector::CombiningKind::OR; })
+      .Case<XOrOp>([&](auto op) { return vector::CombiningKind::XOR; })
       .Default([&](auto op) { return llvm::None; });
 }
 

diff  --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index 1e6c8010c309..d3aa8c3a2953 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -822,9 +822,9 @@ func @red_max_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
   // CHECK: maxf {{.*}} : vector<4x4xf32>
   // CHECK: vector.multi_reduction #vector.kind<maxf>, {{.*}} [1] : vector<4x4xf32> to vector<4xf32>
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
-  %minf32 = constant -3.40282e+38 : f32
+  %ident = constant -3.40282e+38 : f32
   %init = linalg.init_tensor [4] : tensor<4xf32>
-  %fill = linalg.fill(%minf32, %init) : f32, tensor<4xf32> -> tensor<4xf32>
+  %fill = linalg.fill(%ident, %init) : f32, tensor<4xf32> -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
@@ -863,6 +863,106 @@ func @red_min_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
 
 // -----
 
+// CHECK-LABEL:   func @red_mul_2d(
+func @red_mul_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xf32>, vector<4x4xf32>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xf32>, vector<4x4xf32>
+  // CHECK: mulf {{.*}} : vector<4x4xf32>
+  // CHECK: vector.multi_reduction #vector.kind<mul>, {{.*}} [1] : vector<4x4xf32> to vector<4xf32>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
+  %ident = constant 1.0 : f32
+  %init = linalg.init_tensor [4] : tensor<4xf32>
+  %fill = linalg.fill(%ident, %init) : f32, tensor<4xf32> -> tensor<4xf32>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+  ^bb0(%in0: f32, %out0: f32):  // no predecessors
+    %mul = mulf %in0, %out0 : f32
+    linalg.yield %mul : f32
+  } -> tensor<4xf32>
+  return %red : tensor<4xf32>
+}
+
+// -----
+
+// CHECK-LABEL:   func @red_or_2d(
+func @red_or_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xi1>, vector<4x4xi1>
+  // CHECK: or {{.*}} : vector<4x4xi1>
+  // CHECK: vector.multi_reduction #vector.kind<or>, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  %ident = constant false
+  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+  ^bb0(%in0: i1, %out0: i1):  // no predecessors
+    %or = or %in0, %out0 : i1
+    linalg.yield %or : i1
+  } -> tensor<4xi1>
+  return %red : tensor<4xi1>
+}
+
+// -----
+
+// CHECK-LABEL:   func @red_and_2d(
+func @red_and_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xi1>, vector<4x4xi1>
+  // CHECK: and {{.*}} : vector<4x4xi1>
+  // CHECK: vector.multi_reduction #vector.kind<and>, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  %ident = constant true
+  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+  ^bb0(%in0: i1, %out0: i1):  // no predecessors
+    %and = and %in0, %out0 : i1
+    linalg.yield %and : i1
+  } -> tensor<4xi1>
+  return %red : tensor<4xi1>
+}
+
+// -----
+
+// CHECK-LABEL:   func @red_xor_2d(
+func @red_xor_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xi1>, vector<4x4xi1>
+  // CHECK: xor {{.*}} : vector<4x4xi1>
+  // CHECK: vector.multi_reduction #vector.kind<xor>, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  %ident = constant false
+  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+  ^bb0(%in0: i1, %out0: i1):  // no predecessors
+    %xor = xor %in0, %out0 : i1
+    linalg.yield %xor : i1
+  } -> tensor<4xi1>
+  return %red : tensor<4xi1>
+}
+
+// -----
+
 // CHECK-DAG: #[[$M5:.*]] = affine_map<(d0, d1) -> (d0, 0)>
 
 // CHECK-LABEL:   func @explicit_broadcast(