[Mlir-commits] [mlir] b9ae741 - [mlir] Fix getVectorReductionOp
Amy Zhuang
llvmlistbot at llvm.org
Tue Oct 26 09:14:42 PDT 2021
Author: Amy Zhuang
Date: 2021-10-26T08:42:34-07:00
New Revision: b9ae741d3e809ec38a4131f5d1c48131be3af256
URL: https://github.com/llvm/llvm-project/commit/b9ae741d3e809ec38a4131f5d1c48131be3af256
DIFF: https://github.com/llvm/llvm-project/commit/b9ae741d3e809ec38a4131f5d1c48131be3af256.diff
LOG: [mlir] Fix getVectorReductionOp
1.Combining kind min/max of Vector reduction op has been changed to
minf/maxf, minsi/maxsi, and minui/maxui. Modify getVectorReductionOp
accordingly.
2.Add min/max to supported reductions.
Reviewed By: dcaballe, nicolasvasilache
Differential Revision: https://reviews.llvm.org/D112246
Added:
Modified:
mlir/lib/Analysis/AffineAnalysis.cpp
mlir/lib/Dialect/StandardOps/IR/Ops.cpp
mlir/lib/Dialect/Vector/VectorOps.cpp
mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp
index fcc18508341e3..8ba6fe216dd14 100644
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@@ -58,6 +58,12 @@ static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
.Case([](arith::MulFOp) { return AtomicRMWKind::mulf; })
.Case([](arith::AddIOp) { return AtomicRMWKind::addi; })
.Case([](arith::MulIOp) { return AtomicRMWKind::muli; })
+ .Case([](MinFOp) { return AtomicRMWKind::minf; })
+ .Case([](MaxFOp) { return AtomicRMWKind::maxf; })
+ .Case([](MinSIOp) { return AtomicRMWKind::mins; })
+ .Case([](MaxSIOp) { return AtomicRMWKind::maxs; })
+ .Case([](MinUIOp) { return AtomicRMWKind::minu; })
+ .Case([](MaxUIOp) { return AtomicRMWKind::maxu; })
.Default([](Operation *) -> Optional<AtomicRMWKind> {
// TODO: AtomicRMW supports other kinds of reductions this is
// currently not detecting, add those when the need arises.
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index 9b6052bcc2902..91470be5e7a5c 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -251,35 +251,17 @@ Value mlir::getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc,
case AtomicRMWKind::muli:
return builder.create<arith::MulIOp>(loc, lhs, rhs);
case AtomicRMWKind::maxf:
- return builder.create<SelectOp>(
- loc,
- builder.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGT, lhs, rhs),
- lhs, rhs);
+ return builder.create<MaxFOp>(loc, lhs, rhs);
case AtomicRMWKind::minf:
- return builder.create<SelectOp>(
- loc,
- builder.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLT, lhs, rhs),
- lhs, rhs);
+ return builder.create<MinFOp>(loc, lhs, rhs);
case AtomicRMWKind::maxs:
- return builder.create<SelectOp>(
- loc,
- builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sgt, lhs, rhs),
- lhs, rhs);
+ return builder.create<MaxSIOp>(loc, lhs, rhs);
case AtomicRMWKind::mins:
- return builder.create<SelectOp>(
- loc,
- builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, lhs, rhs),
- lhs, rhs);
+ return builder.create<MinSIOp>(loc, lhs, rhs);
case AtomicRMWKind::maxu:
- return builder.create<SelectOp>(
- loc,
- builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ugt, lhs, rhs),
- lhs, rhs);
+ return builder.create<MaxUIOp>(loc, lhs, rhs);
case AtomicRMWKind::minu:
- return builder.create<SelectOp>(
- loc,
- builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ult, lhs, rhs),
- lhs, rhs);
+ return builder.create<MinUIOp>(loc, lhs, rhs);
// TODO: Add remaining reduction operations.
default:
(void)emitOptionalError(loc, "Reduction operation type not supported");
diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp
index e2268569d5d43..149662b124e78 100644
--- a/mlir/lib/Dialect/Vector/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/VectorOps.cpp
@@ -371,16 +371,28 @@ Value mlir::vector::getVectorReductionOp(AtomicRMWKind op, OpBuilder &builder,
builder.getStringAttr("mul"),
vector, ValueRange{});
case AtomicRMWKind::minf:
+ return builder.create<vector::ReductionOp>(vector.getLoc(), scalarType,
+ builder.getStringAttr("minf"),
+ vector, ValueRange{});
case AtomicRMWKind::mins:
+ return builder.create<vector::ReductionOp>(vector.getLoc(), scalarType,
+ builder.getStringAttr("minsi"),
+ vector, ValueRange{});
case AtomicRMWKind::minu:
return builder.create<vector::ReductionOp>(vector.getLoc(), scalarType,
- builder.getStringAttr("min"),
+ builder.getStringAttr("minui"),
vector, ValueRange{});
case AtomicRMWKind::maxf:
+ return builder.create<vector::ReductionOp>(vector.getLoc(), scalarType,
+ builder.getStringAttr("maxf"),
+ vector, ValueRange{});
case AtomicRMWKind::maxs:
+ return builder.create<vector::ReductionOp>(vector.getLoc(), scalarType,
+ builder.getStringAttr("maxsi"),
+ vector, ValueRange{});
case AtomicRMWKind::maxu:
return builder.create<vector::ReductionOp>(vector.getLoc(), scalarType,
- builder.getStringAttr("max"),
+ builder.getStringAttr("maxui"),
vector, ValueRange{});
// TODO: Add remaining reduction operations.
default:
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
index 0a36fbc0ab558..f82640e5daa39 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
@@ -29,6 +29,168 @@ func @vecdim_reduction(%in: memref<256x512xf32>, %out: memref<256xf32>) {
// -----
+func @vecdim_reduction_minf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
+ %cst = arith.constant 0x7F800000 : f32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xf32>
+ %min = minf %red_iter, %ld : f32
+ affine.yield %min : f32
+ }
+ affine.store %final_red, %out[%i] : memref<256xf32>
+ }
+ return
+}
+
+// CHECK-LABEL: @vecdim_reduction_minf
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vmax:.*]] = arith.constant dense<0x7F800000> : vector<128xf32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmax]]) -> (vector<128xf32>) {
+// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32>
+// CHECK: %[[min:.*]] = minf %[[red_iter]], %[[ld]] : vector<128xf32>
+// CHECK: affine.yield %[[min]] : vector<128xf32>
+// CHECK: }
+// CHECK: %[[final_min:.*]] = vector.reduction "minf", %[[vred:.*]] : vector<128xf32> into f32
+// CHECK: affine.store %[[final_min]], %{{.*}} : memref<256xf32>
+// CHECK: }
+
+// -----
+
+func @vecdim_reduction_maxf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
+ %cst = arith.constant 0xFF800000 : f32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xf32>
+ %max = maxf %red_iter, %ld : f32
+ affine.yield %max : f32
+ }
+ affine.store %final_red, %out[%i] : memref<256xf32>
+ }
+ return
+}
+
+// CHECK-LABEL: @vecdim_reduction_maxf
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vmin:.*]] = arith.constant dense<0xFF800000> : vector<128xf32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmin]]) -> (vector<128xf32>) {
+// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32>
+// CHECK: %[[max:.*]] = maxf %[[red_iter]], %[[ld]] : vector<128xf32>
+// CHECK: affine.yield %[[max]] : vector<128xf32>
+// CHECK: }
+// CHECK: %[[final_max:.*]] = vector.reduction "maxf", %[[vred:.*]] : vector<128xf32> into f32
+// CHECK: affine.store %[[final_max]], %{{.*}} : memref<256xf32>
+// CHECK: }
+
+// -----
+
+func @vecdim_reduction_minsi(%in: memref<256x512xi32>, %out: memref<256xi32>) {
+ %cst = arith.constant 2147483647 : i32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xi32>
+ %min = minsi %red_iter, %ld : i32
+ affine.yield %min : i32
+ }
+ affine.store %final_red, %out[%i] : memref<256xi32>
+ }
+ return
+}
+
+// CHECK-LABEL: @vecdim_reduction_minsi
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vmax:.*]] = arith.constant dense<2147483647> : vector<128xi32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmax]]) -> (vector<128xi32>) {
+// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xi32>, vector<128xi32>
+// CHECK: %[[min:.*]] = minsi %[[red_iter]], %[[ld]] : vector<128xi32>
+// CHECK: affine.yield %[[min]] : vector<128xi32>
+// CHECK: }
+// CHECK: %[[final_min:.*]] = vector.reduction "minsi", %[[vred:.*]] : vector<128xi32> into i32
+// CHECK: affine.store %[[final_min]], %{{.*}} : memref<256xi32>
+// CHECK: }
+
+// -----
+
+func @vecdim_reduction_maxsi(%in: memref<256x512xi32>, %out: memref<256xi32>) {
+ %cst = arith.constant -2147483648 : i32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xi32>
+ %max = maxsi %red_iter, %ld : i32
+ affine.yield %max : i32
+ }
+ affine.store %final_red, %out[%i] : memref<256xi32>
+ }
+ return
+}
+
+// CHECK-LABEL: @vecdim_reduction_maxsi
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vmin:.*]] = arith.constant dense<-2147483648> : vector<128xi32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmin]]) -> (vector<128xi32>) {
+// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xi32>, vector<128xi32>
+// CHECK: %[[max:.*]] = maxsi %[[red_iter]], %[[ld]] : vector<128xi32>
+// CHECK: affine.yield %[[max]] : vector<128xi32>
+// CHECK: }
+// CHECK: %[[final_max:.*]] = vector.reduction "maxsi", %[[vred:.*]] : vector<128xi32> into i32
+// CHECK: affine.store %[[final_max]], %{{.*}} : memref<256xi32>
+// CHECK: }
+
+// -----
+
+func @vecdim_reduction_minui(%in: memref<256x512xi32>, %out: memref<256xi32>) {
+ %cst = arith.constant -1 : i32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xi32>
+ %min = minui %red_iter, %ld : i32
+ affine.yield %min : i32
+ }
+ affine.store %final_red, %out[%i] : memref<256xi32>
+ }
+ return
+}
+
+// CHECK-LABEL: @vecdim_reduction_minui
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vmax:.*]] = arith.constant dense<-1> : vector<128xi32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmax]]) -> (vector<128xi32>) {
+// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xi32>, vector<128xi32>
+// CHECK: %[[min:.*]] = minui %[[red_iter]], %[[ld]] : vector<128xi32>
+// CHECK: affine.yield %[[min]] : vector<128xi32>
+// CHECK: }
+// CHECK: %[[final_min:.*]] = vector.reduction "minui", %[[vred:.*]] : vector<128xi32> into i32
+// CHECK: affine.store %[[final_min]], %{{.*}} : memref<256xi32>
+// CHECK: }
+
+// -----
+
+func @vecdim_reduction_maxui(%in: memref<256x512xi32>, %out: memref<256xi32>) {
+ %cst = arith.constant 0 : i32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xi32>
+ %max = maxui %red_iter, %ld : i32
+ affine.yield %max : i32
+ }
+ affine.store %final_red, %out[%i] : memref<256xi32>
+ }
+ return
+}
+
+// CHECK-LABEL: @vecdim_reduction_maxui
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vmin:.*]] = arith.constant dense<0> : vector<128xi32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmin]]) -> (vector<128xi32>) {
+// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xi32>, vector<128xi32>
+// CHECK: %[[max:.*]] = maxui %[[red_iter]], %[[ld]] : vector<128xi32>
+// CHECK: affine.yield %[[max]] : vector<128xi32>
+// CHECK: }
+// CHECK: %[[final_max:.*]] = vector.reduction "maxui", %[[vred:.*]] : vector<128xi32> into i32
+// CHECK: affine.store %[[final_max]], %{{.*}} : memref<256xi32>
+// CHECK: }
+
+// -----
+
// The inner reduction loop '%j' is vectorized. (The order of addf's operands is
//
diff erent than in the previous test case).
More information about the Mlir-commits
mailing list