[Mlir-commits] [mlir] 84a880e - [mlir][SCF] NFC - Drop SCF EDSC usage
Nicolas Vasilache
llvmlistbot at llvm.org
Wed May 19 08:52:42 PDT 2021
Author: Nicolas Vasilache
Date: 2021-05-19T15:52:14Z
New Revision: 84a880e1e23ebc2ca60e6e1f9e8d0d8db3f9a036
URL: https://github.com/llvm/llvm-project/commit/84a880e1e23ebc2ca60e6e1f9e8d0d8db3f9a036
DIFF: https://github.com/llvm/llvm-project/commit/84a880e1e23ebc2ca60e6e1f9e8d0d8db3f9a036.diff
LOG: [mlir][SCF] NFC - Drop SCF EDSC usage
Drop the SCF dialect EDSC subdirectory and update all uses.
Differential Revision: https://reviews.llvm.org/D102780
Added:
Modified:
mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
mlir/include/mlir/EDSC/Builders.h
mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/lib/Dialect/SCF/CMakeLists.txt
mlir/test/Dialect/Linalg/affine.mlir
mlir/test/Dialect/Linalg/loops.mlir
Removed:
mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h
mlir/lib/Dialect/SCF/EDSC/Builders.cpp
################################################################################
diff --git a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
index d99f29f3b5ba..40b814fd95a8 100644
--- a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
@@ -89,132 +89,6 @@ Value uge(Value lhs, Value rhs);
} // namespace op
-/// Arithmetic operator overloadings.
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator+(Value e) {
- using op::operator+;
- return static_cast<Value>(*this) + e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator-(Value e) {
- using op::operator-;
- return static_cast<Value>(*this) - e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator*(Value e) {
- using op::operator*;
- return static_cast<Value>(*this) * e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator/(Value e) {
- using op::operator/;
- return static_cast<Value>(*this) / e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator%(Value e) {
- using op::operator%;
- return static_cast<Value>(*this) % e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator^(Value e) {
- using op::operator^;
- return static_cast<Value>(*this) ^ e;
-}
-
-/// Assignment-arithmetic operator overloadings.
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator+=(Value e) {
- using op::operator+;
- return Store(*this + e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator-=(Value e) {
- using op::operator-;
- return Store(*this - e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator*=(Value e) {
- using op::operator*;
- return Store(*this * e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator/=(Value e) {
- using op::operator/;
- return Store(*this / e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator%=(Value e) {
- using op::operator%;
- return Store(*this % e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator^=(Value e) {
- using op::operator^;
- return Store(*this ^ e, getBase(), indices);
-}
-
-/// Logical operator overloadings.
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator&&(Value e) {
- using op::operator&&;
- return static_cast<Value>(*this) && e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator||(Value e) {
- using op::operator||;
- return static_cast<Value>(*this) || e;
-}
-
-/// Comparison operator overloadings.
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::eq(Value e) {
- return eq(value, e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ne(Value e) {
- return ne(value, e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::slt(Value e) {
- using op::slt;
- return slt(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::sle(Value e) {
- using op::sle;
- return sle(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::sgt(Value e) {
- using op::sgt;
- return sgt(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::sge(Value e) {
- using op::sge;
- return sge(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ult(Value e) {
- using op::ult;
- return ult(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ule(Value e) {
- using op::ule;
- return ule(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ugt(Value e) {
- using op::ugt;
- return ugt(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::uge(Value e) {
- using op::uge;
- return uge(static_cast<Value>(*this), e);
-}
-
} // namespace edsc
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
index 04595b7fd862..ffb420b31125 100644
--- a/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
@@ -21,9 +21,6 @@ using affine_min = ValueBuilder<AffineMinOp>;
using affine_max = ValueBuilder<AffineMaxOp>;
using affine_store = OperationBuilder<AffineStoreOp>;
-/// Provide an index notation around affine_load and affine_store.
-using AffineIndexedValue = TemplatedIndexedValue<affine_load, affine_store>;
-
} // namespace intrinsics
} // namespace edsc
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index b0dd38e75486..7f844fef9c56 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -244,19 +244,15 @@ struct RegionMatcher {
/// Utility class used to generate nested loops with ranges described by
/// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
/// is used to generate the body of the innermost loop. It is passed a range
-/// of loop induction variables.
+/// of loop induction variables and a range of iterArgs.
template <typename LoopTy>
struct GenerateLoopNest {
- using IndexedValueTy =
- typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
- edsc::intrinsics::AffineIndexedValue,
- edsc::intrinsics::MemRefIndexedValue>::type;
-
- static void
- doit(ArrayRef<Range> loopRanges, LinalgOp linalgOp,
- ArrayRef<Attribute> iteratorTypes,
- function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
- Optional<LinalgLoopDistributionOptions> = None);
+ static void doit(OpBuilder &b, Location loc, ArrayRef<Range> loopRanges,
+ LinalgOp linalgOp, ArrayRef<Attribute> iteratorTypes,
+ function_ref<scf::ValueVector(OpBuilder &, Location,
+ ValueRange, ValueRange)>
+ bodyBuilderFn,
+ Optional<LinalgLoopDistributionOptions> = None);
};
} // namespace linalg
diff --git a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
index 388115c20df9..cd121eb691bb 100644
--- a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
@@ -31,9 +31,6 @@ using memref_tensor_load = ValueBuilder<memref::TensorLoadOp>;
using memref_tensor_store = OperationBuilder<memref::TensorStoreOp>;
using memref_view = ValueBuilder<memref::ViewOp>;
-/// Provide an index notation around memref_load and memref_store.
-using MemRefIndexedValue =
- TemplatedIndexedValue<intrinsics::memref_load, intrinsics::memref_store>;
} // namespace intrinsics
} // namespace edsc
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
deleted file mode 100644
index 8622d8c98315..000000000000
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- Builders.h - MLIR Declarative Builder Classes ------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Provides intuitive composable interfaces for building structured MLIR
-// snippets in a declarative fashion.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
-#define MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
-
-#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Types.h"
-
-namespace mlir {
-namespace edsc {
-
-/// Adapters for building loop nests using the builder and the location stored
-/// in ScopedContext. Actual builders are in scf::buildLoopNest.
-scf::LoopNest loopNestBuilder(ValueRange lbs, ValueRange ubs,
- ValueRange steps,
- function_ref<void(ValueRange)> fun = nullptr);
-scf::LoopNest loopNestBuilder(Value lb, Value ub, Value step,
- function_ref<void(Value)> fun = nullptr);
-scf::LoopNest loopNestBuilder(
- Value lb, Value ub, Value step, ValueRange iterArgInitValues,
- function_ref<scf::ValueVector(Value, ValueRange)> fun = nullptr);
-scf::LoopNest loopNestBuilder(
- ValueRange lbs, ValueRange ubs, ValueRange steps,
- ValueRange iterArgInitValues,
- function_ref<scf::ValueVector(ValueRange, ValueRange)> fun = nullptr);
-
-/// Adapters for building if conditions using the builder and the location
-/// stored in ScopedContext. 'thenBody' is mandatory, 'elseBody' can be omitted
-/// if the condition should not have an 'else' part.
-/// When `ifOp` is specified, the scf::IfOp is captured. This is particularly
-/// convenient for 0-result conditions.
-ValueRange conditionBuilder(TypeRange results, Value condition,
- function_ref<scf::ValueVector()> thenBody,
- function_ref<scf::ValueVector()> elseBody = nullptr,
- scf::IfOp *ifOp = nullptr);
-ValueRange conditionBuilder(Value condition, function_ref<void()> thenBody,
- function_ref<void()> elseBody = nullptr,
- scf::IfOp *ifOp = nullptr);
-
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h
deleted file mode 100644
index fcc2971963c6..000000000000
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- Intrinsics.h - MLIR EDSC Intrinsics for SCF --------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM
-// Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
-#define MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
-
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
-
-namespace mlir {
-namespace edsc {
-namespace intrinsics {
-
-using loop_yield = OperationBuilder<scf::YieldOp>;
-
-} // namespace intrinsics
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h
index f904f2a53eb0..b6ef3c804e55 100644
--- a/mlir/include/mlir/EDSC/Builders.h
+++ b/mlir/include/mlir/EDSC/Builders.h
@@ -169,172 +169,6 @@ struct StructuredIndexed {
SmallVector<AffineExpr, 4> exprs;
};
-/// A TemplatedIndexedValue brings an index notation over the template Load and
-/// Store parameters. Assigning to an IndexedValue emits an actual `Store`
-/// operation, while converting an IndexedValue to a Value emits an actual
-/// `Load` operation.
-template <typename Load, typename Store>
-class TemplatedIndexedValue {
-public:
- explicit TemplatedIndexedValue(Value v) : value(v) {}
-
- TemplatedIndexedValue(const TemplatedIndexedValue &rhs) = default;
-
- TemplatedIndexedValue operator()() { return *this; }
- /// Returns a new `TemplatedIndexedValue`.
- TemplatedIndexedValue operator()(Value index) {
- TemplatedIndexedValue res(value);
- res.indices.push_back(index);
- return res;
- }
- template <typename... Args>
- TemplatedIndexedValue operator()(Value index, Args... indices) {
- return TemplatedIndexedValue(value, index).append(indices...);
- }
- TemplatedIndexedValue operator()(ValueRange indices) {
- return TemplatedIndexedValue(value, indices);
- }
-
- /// Emits a `store`.
- Store operator=(const TemplatedIndexedValue &rhs) {
- return Store(rhs, value, indices);
- }
- Store operator=(Value rhs) { return Store(rhs, value, indices); }
-
- /// Emits a `load` when converting to a Value.
- operator Value() const { return Load(value, indices); }
-
- /// Returns the base memref.
- Value getBase() const { return value; }
-
- /// Returns the underlying memref.
- MemRefType getMemRefType() const {
- return value.getType().template cast<MemRefType>();
- }
-
- /// Returns the underlying MemRef elemental type cast as `T`.
- template <typename T>
- T getElementalTypeAs() const {
- return value.getType()
- .template cast<MemRefType>()
- .getElementType()
- .template cast<T>();
- }
-
- /// Arithmetic operator overloadings.
- Value operator+(Value e);
- Value operator-(Value e);
- Value operator*(Value e);
- Value operator/(Value e);
- Value operator%(Value e);
- Value operator^(Value e);
- Value operator+(TemplatedIndexedValue e) {
- return *this + static_cast<Value>(e);
- }
- Value operator-(TemplatedIndexedValue e) {
- return *this - static_cast<Value>(e);
- }
- Value operator*(TemplatedIndexedValue e) {
- return *this * static_cast<Value>(e);
- }
- Value operator/(TemplatedIndexedValue e) {
- return *this / static_cast<Value>(e);
- }
- Value operator%(TemplatedIndexedValue e) {
- return *this % static_cast<Value>(e);
- }
- Value operator^(TemplatedIndexedValue e) {
- return *this ^ static_cast<Value>(e);
- }
-
- /// Assignment-arithmetic operator overloadings.
- Store operator+=(Value e);
- Store operator-=(Value e);
- Store operator*=(Value e);
- Store operator/=(Value e);
- Store operator%=(Value e);
- Store operator^=(Value e);
- Store operator+=(TemplatedIndexedValue e) {
- return this->operator+=(static_cast<Value>(e));
- }
- Store operator-=(TemplatedIndexedValue e) {
- return this->operator-=(static_cast<Value>(e));
- }
- Store operator*=(TemplatedIndexedValue e) {
- return this->operator*=(static_cast<Value>(e));
- }
- Store operator/=(TemplatedIndexedValue e) {
- return this->operator/=(static_cast<Value>(e));
- }
- Store operator%=(TemplatedIndexedValue e) {
- return this->operator%=(static_cast<Value>(e));
- }
- Store operator^=(TemplatedIndexedValue e) {
- return this->operator^=(static_cast<Value>(e));
- }
-
- /// Logical operator overloadings.
- Value operator&&(Value e);
- Value operator||(Value e);
- Value operator&&(TemplatedIndexedValue e) {
- return *this && static_cast<Value>(e);
- }
- Value operator||(TemplatedIndexedValue e) {
- return *this || static_cast<Value>(e);
- }
-
- /// Comparison operator overloadings.
- Value eq(Value e);
- Value ne(Value e);
- Value slt(Value e);
- Value sle(Value e);
- Value sgt(Value e);
- Value sge(Value e);
- Value ult(Value e);
- Value ule(Value e);
- Value ugt(Value e);
- Value uge(Value e);
- Value slt(TemplatedIndexedValue e) {
- return slt(*this, static_cast<Value>(e));
- }
- Value sle(TemplatedIndexedValue e) {
- return sle(*this, static_cast<Value>(e));
- }
- Value sgt(TemplatedIndexedValue e) {
- return sgt(*this, static_cast<Value>(e));
- }
- Value sge(TemplatedIndexedValue e) {
- return sge(*this, static_cast<Value>(e));
- }
- Value ult(TemplatedIndexedValue e) {
- return ult(*this, static_cast<Value>(e));
- }
- Value ule(TemplatedIndexedValue e) {
- return ule(*this, static_cast<Value>(e));
- }
- Value ugt(TemplatedIndexedValue e) {
- return ugt(*this, static_cast<Value>(e));
- }
- Value uge(TemplatedIndexedValue e) {
- return uge(*this, static_cast<Value>(e));
- }
-
-private:
- TemplatedIndexedValue(Value value, ValueRange indices)
- : value(value), indices(indices.begin(), indices.end()) {}
-
- TemplatedIndexedValue &append() { return *this; }
-
- template <typename T, typename... Args>
- TemplatedIndexedValue &append(T index, Args... indices) {
- this->indices.push_back(static_cast<Value>(index));
- append(indices...);
- return *this;
- }
- Value value;
- SmallVector<Value, 8> indices;
-};
-
} // namespace edsc
} // namespace mlir
diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
index cc50865d0c6b..f092a4f9b34e 100644
--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@@ -14,7 +14,7 @@
#include "mlir/Dialect/GPU/MemoryPromotion.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
+#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
@@ -41,7 +41,7 @@ static StringRef getDimName(unsigned dim) {
/// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with
/// single-iteration loops. Maps the innermost loops to thread dimensions, in
/// reverse order to enable access coalescing in the innermost loop.
-static void insertCopyLoops(OpBuilder &builder, Location loc,
+static void insertCopyLoops(OpBuilder &b, Location loc,
MemRefBoundsCapture &bounds, Value from, Value to) {
// Create EDSC handles for bounds.
unsigned rank = bounds.rank();
@@ -68,24 +68,24 @@ static void insertCopyLoops(OpBuilder &builder, Location loc,
[](int64_t step) { return std_constant_index(step); });
// Obtain thread identifiers and block sizes, necessary to map to them.
- auto indexType = builder.getIndexType();
+ auto indexType = b.getIndexType();
SmallVector<Value, 3> threadIds, blockDims;
for (unsigned i = 0; i < 3; ++i) {
- auto dimName = builder.getStringAttr(getDimName(i));
- threadIds.push_back(
- builder.create<gpu::ThreadIdOp>(loc, indexType, dimName));
- blockDims.push_back(
- builder.create<gpu::BlockDimOp>(loc, indexType, dimName));
+ auto dimName = b.getStringAttr(getDimName(i));
+ threadIds.push_back(b.create<gpu::ThreadIdOp>(loc, indexType, dimName));
+ blockDims.push_back(b.create<gpu::BlockDimOp>(loc, indexType, dimName));
}
// Produce the loop nest with copies.
SmallVector<Value, 8> ivs(lbs.size());
- loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
- ivs.assign(loopIvs.begin(), loopIvs.end());
- auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
- MemRefIndexedValue fromHandle(from), toHandle(to);
- toHandle(activeIvs) = fromHandle(activeIvs);
- });
+ mlir::scf::buildLoopNest(
+ b, loc, lbs, ubs, steps,
+ [&](OpBuilder &b, Location loc, ValueRange loopIvs) {
+ ivs.assign(loopIvs.begin(), loopIvs.end());
+ auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
+ Value loaded = b.create<memref::LoadOp>(loc, from, activeIvs);
+ b.create<memref::StoreOp>(loc, loaded, to, activeIvs);
+ });
// Map the innermost loops to threads in reverse order.
for (auto en :
@@ -142,17 +142,17 @@ static void insertCopies(Region ®ion, Location loc, Value from, Value to) {
assert(llvm::hasSingleElement(region) &&
"unstructured control flow not supported");
- OpBuilder builder(region.getContext());
- builder.setInsertionPointToStart(®ion.front());
+ OpBuilder b(region.getContext());
+ b.setInsertionPointToStart(®ion.front());
- ScopedContext edscContext(builder, loc);
+ ScopedContext edscContext(b, loc);
MemRefBoundsCapture fromBoundsCapture(from);
- insertCopyLoops(builder, loc, fromBoundsCapture, from, to);
- builder.create<gpu::BarrierOp>(loc);
+ insertCopyLoops(b, loc, fromBoundsCapture, from, to);
+ b.create<gpu::BarrierOp>(loc);
- builder.setInsertionPoint(®ion.front().back());
- builder.create<gpu::BarrierOp>(loc);
- insertCopyLoops(builder, loc, fromBoundsCapture, to, from);
+ b.setInsertionPoint(®ion.front().back());
+ b.create<gpu::BarrierOp>(loc);
+ insertCopyLoops(b, loc, fromBoundsCapture, to, from);
}
/// Promotes a function argument to workgroup memory in the given function. The
diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
index 46e42e26c5c8..45017faf38c6 100644
--- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
@@ -11,7 +11,6 @@
#include "mlir/Dialect/Linalg/EDSC/Builders.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/Math/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/IR/AffineExpr.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
index 317a9864516a..c99084762a56 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -7,16 +7,11 @@
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
-#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
-#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
-#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BlockAndValueMapping.h"
@@ -27,38 +22,67 @@
#include "llvm/ADT/TypeSwitch.h"
using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
using namespace mlir::linalg;
-using edsc::op::operator+;
+namespace {
+/// Helper struct to build simple arithmetic quantities with minimal type
+/// inference support.
+struct ArithBuilder {
+ ArithBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {}
+
+ Value select(Value cmp, Value lhs, Value rhs) {
+ return b.create<SelectOp>(loc, cmp, lhs, rhs);
+ }
+ Value slt(Value lhs, Value rhs) {
+ if (lhs.getType().isa<IntegerType>())
+ return b.create<CmpIOp>(loc, CmpIPredicate::slt, lhs, rhs);
+ return b.create<CmpFOp>(loc, CmpFPredicate::OLT, lhs, rhs);
+ }
+ Value sgt(Value lhs, Value rhs) {
+ if (lhs.getType().isa<IntegerType>())
+ return b.create<CmpIOp>(loc, CmpIPredicate::sgt, lhs, rhs);
+ return b.create<CmpFOp>(loc, CmpFPredicate::OGT, lhs, rhs);
+ }
+ Value add(Value lhs, Value rhs) {
+ if (lhs.getType().isa<IntegerType>())
+ return b.create<AddIOp>(loc, lhs, rhs);
+ return b.create<AddFOp>(loc, lhs, rhs);
+ }
+ Value mul(Value lhs, Value rhs) {
+ if (lhs.getType().isa<IntegerType>())
+ return b.create<MulIOp>(loc, lhs, rhs);
+ return b.create<MulFOp>(loc, lhs, rhs);
+ }
+
+ OpBuilder &b;
+ Location loc;
+};
+} // namespace
-static SmallVector<Value, 8> makeCanonicalAffineApplies(OpBuilder &b,
- Location loc,
- AffineMap map,
- ArrayRef<Value> vals) {
+static SmallVector<Value> makeCanonicalAffineApplies(OpBuilder &b, Location loc,
+ AffineMap map,
+ ArrayRef<Value> vals) {
if (map.isEmpty())
return {};
assert(map.getNumInputs() == vals.size());
- SmallVector<Value, 8> res;
+ SmallVector<Value> res;
res.reserve(map.getNumResults());
auto dims = map.getNumDims();
for (auto e : map.getResults()) {
auto exprMap = AffineMap::get(dims, map.getNumSymbols(), e);
- SmallVector<Value, 4> operands(vals.begin(), vals.end());
+ SmallVector<Value> operands(vals.begin(), vals.end());
canonicalizeMapAndOperands(&exprMap, &operands);
- res.push_back(affine_apply(exprMap, operands));
+ res.push_back(b.create<AffineApplyOp>(loc, exprMap, operands));
}
return res;
}
-template <typename IndexedValueType, typename OpType>
-static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
- ArrayRef<SmallVector<Value, 8>> indexing,
+template <typename LoadOpTy, typename StoreOpTy, typename OpType>
+static void inlineRegionAndEmitStore(OpBuilder &b, Location loc, OpType op,
+ ArrayRef<Value> indexedValues,
+ ArrayRef<SmallVector<Value>> indexing,
ArrayRef<Value> outputBuffers) {
- assert(op->getNumRegions() == 1 && "Expected single region op");
- auto &b = ScopedContext::getBuilderRef();
auto &block = op->getRegion(0).front();
BlockAndValueMapping map;
map.map(block.getArguments(), indexedValues);
@@ -67,26 +91,24 @@ static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
map.map(op.getResults(), newOp->getResults());
}
- Operation &terminator = block.back();
- assert(isa<linalg::YieldOp>(terminator) &&
- "expected a yield op in the end of the region");
- for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) {
- IndexedValueType O(outputBuffers[i]);
- O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i));
+ Operation *terminator = block.getTerminator();
+ for (OpOperand &operand : terminator->getOpOperands()) {
+ Value toStore = map.lookupOrDefault(operand.get());
+ b.create<StoreOpTy>(loc, toStore, outputBuffers[operand.getOperandNumber()],
+ indexing[operand.getOperandNumber()]);
}
}
// Returns a pair that contains input indices and output indices of a
// SingleInputPoolingOp `op`.
struct InputAndOutputIndices {
- SmallVector<Value, 8> inputs;
- SmallVector<Value, 8> outputs;
+ SmallVector<Value> inputs;
+ SmallVector<Value> outputs;
};
template <typename SingleInputPoolingOp>
-static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
- SingleInputPoolingOp op) {
- auto &b = ScopedContext::getBuilderRef();
- auto loc = ScopedContext::getLocation();
+static InputAndOutputIndices
+getInputAndOutputIndices(OpBuilder &b, Location loc, ArrayRef<Value> allIvs,
+ SingleInputPoolingOp op) {
auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>();
auto maps = llvm::to_vector<8>(
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
@@ -125,19 +147,18 @@ static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
/// }
/// }
/// ```
-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs,
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+ ArrayRef<Value> allIvs,
LinalgOp linalgOp) {
assert(linalgOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
- auto &b = ScopedContext::getBuilderRef();
- auto loc = ScopedContext::getLocation();
unsigned nInputs = linalgOp.getNumInputs();
unsigned nOutputs = linalgOp.getNumOutputs();
- SmallVector<Value, 4> indexedValues;
+ SmallVector<Value> indexedValues;
indexedValues.reserve(nInputs + nOutputs);
- auto allIvsPlusDims = SmallVector<Value, 4>(allIvs.begin(), allIvs.end());
+ auto allIvsPlusDims = SmallVector<Value>(allIvs.begin(), allIvs.end());
// TODO: Avoid the loads if the corresponding argument of the
// region has no uses.
@@ -145,46 +166,40 @@ static void emitScalarImplementation(ArrayRef<Value> allIvs,
for (unsigned i = 0; i < nInputs; ++i) {
auto indexing = makeCanonicalAffineApplies(
b, loc, linalgOp.getInputIndexingMap(i), allIvsPlusDims);
- // Passing through IndexedValueType emits the proper load operation.
- indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing));
+ indexedValues.push_back(
+ b.create<LoadOpTy>(loc, linalgOp.getInput(i), indexing));
}
// 1.b. Emit load from output views.
for (unsigned i = 0; i < nOutputs; ++i) {
auto indexing = makeCanonicalAffineApplies(
b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims);
- // Passing through IndexedValueType emits the proper load operation.
indexedValues.push_back(
- IndexedValueType(linalgOp.getOutputBuffer(i))(indexing));
+ b.create<LoadOpTy>(loc, linalgOp.getOutputBuffer(i), indexing));
}
// TODO: When a region inliner exists, use it.
// 2. Inline region, currently only works for a single basic block.
// 3. Emit store.
- SmallVector<SmallVector<Value, 8>, 8> indexing;
- SmallVector<Value, 8> outputBuffers;
+ SmallVector<SmallVector<Value>, 8> indexing;
+ SmallVector<Value> outputBuffers;
for (unsigned i = 0; i < nOutputs; ++i) {
indexing.push_back(makeCanonicalAffineApplies(
b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims));
outputBuffers.push_back(linalgOp.getOutputBuffer(i));
}
- inlineRegionAndEmitStore<IndexedValueType>(linalgOp, indexedValues, indexing,
- outputBuffers);
+ inlineRegionAndEmitStore<LoadOpTy, StoreOpTy>(b, loc, linalgOp, indexedValues,
+ indexing, outputBuffers);
}
// Create a padded view into the given `input` tensor using the 'indices'
// to access the tensor. `skipPadding` lists the dimensions for which no padding
// is needed e.g. the non-spatial dimensions for convolutions.
-template <typename IndexedValueType>
-Value getPaddedInput(Value input, ArrayRef<Value> indices,
- ArrayRef<int> skipPadding, Value padValue) {
- // TODO: add a level of indirection to linalg.generic.
-
- IndexedValueType indexedInput(input);
-
- auto *context = ScopedContext::getContext();
- Value zeroIndex = std_constant_index(0);
- SmallVector<Value, 8> conds;
- SmallVector<Value, 8> clampedImIdx;
+Value getPaddedInput(OpBuilder &b, Location loc, Value input,
+ ArrayRef<Value> indices, ArrayRef<int> skipPadding,
+ Value padValue) {
+ Value zeroIndex = b.create<ConstantIndexOp>(loc, 0);
+ SmallVector<Value> conds;
+ SmallVector<Value> clampedImIdx;
for (auto iter : llvm::enumerate(indices)) {
int idx = iter.index();
auto dim = iter.value();
@@ -193,29 +208,33 @@ Value getPaddedInput(Value input, ArrayRef<Value> indices,
continue;
}
- using edsc::op::sge;
- using edsc::op::slt;
- using edsc::op::operator||;
- Value leftOutOfBound = slt(dim, zeroIndex);
+ Value leftOutOfBound =
+ b.create<CmpIOp>(loc, CmpIPredicate::slt, dim, zeroIndex);
if (conds.empty())
conds.push_back(leftOutOfBound);
else
- conds.push_back(conds.back() || leftOutOfBound);
- Value rightBound = memref_dim(input, idx);
- conds.push_back(conds.back() || (sge(dim, rightBound)));
+ conds.push_back(b.create<OrOp>(loc, conds.back(), leftOutOfBound));
+ Value rightBound = b.create<memref::DimOp>(loc, input, idx);
+ Value rightOutOfBound =
+ b.create<CmpIOp>(loc, CmpIPredicate::sge, dim, rightBound);
+ conds.push_back(b.create<OrOp>(loc, conds.back(), rightOutOfBound));
// When padding is involved, the indices will only be shifted to negative,
// so having a max op is enough.
- auto maxMap = AffineMap::get(/*dimCount=*/1, 0,
- {getAffineDimExpr(/*position=*/0, context),
- getAffineConstantExpr(0, context)},
- context);
- clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim}));
+ MLIRContext *ctx = input.getContext();
+ AffineExpr m = getAffineDimExpr(/*position=*/0, ctx),
+ zero = getAffineConstantExpr(0, ctx);
+ AffineMap maxMap =
+ AffineMap::inferFromExprList(ArrayRef<ArrayRef<AffineExpr>>{{m, zero}})
+ .front();
+ clampedImIdx.push_back(b.create<AffineMaxOp>(loc, maxMap, ValueRange{dim}));
}
- Value readInput = indexedInput(clampedImIdx);
- return conds.empty() ? readInput
- : (Value)std_select(conds.back(), padValue, readInput);
+ Value readInput = b.create<memref::LoadOp>(loc, input, clampedImIdx);
+ if (conds.empty())
+ return readInput;
+
+ return b.create<SelectOp>(loc, conds.back(), padValue, readInput);
}
namespace {
@@ -229,48 +248,47 @@ template <typename OpType> Attribute getPadValueAttr(Type type) {
}
template <> Attribute getPadValueAttr<PoolingMaxOp>(Type type) {
- auto &b = ScopedContext::getBuilderRef();
if (auto floatType = type.dyn_cast<FloatType>()) {
- return b.getFloatAttr(
- floatType,
- APFloat::getInf(floatType.getFloatSemantics(), /*Negative*/ true));
+ return OpBuilder(type.getContext())
+ .getFloatAttr(floatType, APFloat::getInf(floatType.getFloatSemantics(),
+ /*Negative*/ true));
}
if (auto intType = type.dyn_cast<IntegerType>()) {
unsigned width = intType.getWidth();
// The select instruction used to lower the PoolingMin uses a signed
// comparison, use a signed constant irrespective of the signedness of the
// integer type.
- return b.getIntegerAttr(intType, APInt::getSignedMinValue(width));
+ return OpBuilder(type.getContext())
+ .getIntegerAttr(intType, APInt::getSignedMinValue(width));
}
llvm_unreachable("Unsupported data type for PoolingMaxOp");
return {};
}
template <> Attribute getPadValueAttr<PoolingMinOp>(Type type) {
- auto &b = ScopedContext::getBuilderRef();
if (auto floatType = type.dyn_cast<FloatType>()) {
- return b.getFloatAttr(floatType,
- APFloat::getInf(floatType.getFloatSemantics()));
+ return OpBuilder(type.getContext())
+ .getFloatAttr(floatType,
+ APFloat::getInf(floatType.getFloatSemantics()));
}
if (auto intType = type.dyn_cast<IntegerType>()) {
unsigned width = intType.getWidth();
// The select instruction used to lower the PoolingMin uses a signed
// comparison, use a signed constant irrespective of the signedness of the
// integer type.
- return b.getIntegerAttr(intType, APInt::getSignedMaxValue(width));
+ return OpBuilder(type.getContext())
+ .getIntegerAttr(intType, APInt::getSignedMaxValue(width));
}
llvm_unreachable("Unsupported data type for PoolingMinOp");
return {};
}
template <> Attribute getPadValueAttr<PoolingSumOp>(Type type) {
- auto &b = ScopedContext::getBuilderRef();
- return b.getZeroAttr(type);
+ return OpBuilder(type.getContext()).getZeroAttr(type);
}
template <> Attribute getPadValueAttr<ConvOp>(Type type) {
- auto &b = ScopedContext::getBuilderRef();
- return b.getZeroAttr(type);
+ return OpBuilder(type.getContext()).getZeroAttr(type);
}
} // namespace
@@ -284,38 +302,43 @@ static bool hasPadding(ConvOp convOp) {
return false;
}
-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+ ArrayRef<Value> allIvs, ConvOp convOp) {
assert(convOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
- auto &b = ScopedContext::getBuilderRef();
- auto loc = ScopedContext::getLocation();
auto mapsRange = convOp.indexing_maps().getAsRange<AffineMapAttr>();
auto maps = llvm::to_vector<8>(
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
- SmallVector<Value, 8> fIdx(
- makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
- SmallVector<Value, 8> imIdx(
- makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
- SmallVector<Value, 8> oIdx(
- makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
+ SmallVector<Value> fIdx(makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
+ SmallVector<Value> imIdx(makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
+ SmallVector<Value> oIdx(makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
- IndexedValueType F(convOp.filter()), O(convOp.output());
+ Value filter = convOp.filter(), output = convOp.output();
// Emit scalar form. Padded conv involves an affine.max in the memory access
// which is not allowed by affine.load. Override to use an MemRefIndexedValue
// when there is non-zero padding.
if (hasPadding(convOp)) {
Type type = convOp.input().getType().cast<MemRefType>().getElementType();
- Value padValue = std_constant(type, getPadValueAttr<ConvOp>(type));
- Value paddedInput = getPaddedInput<MemRefIndexedValue>(
- convOp.input(), imIdx,
- /* Only need to pad the window dimensions */
- {0, static_cast<int>(imIdx.size()) - 1}, padValue);
- O(oIdx) += F(fIdx) * paddedInput;
+ Value padValue =
+ b.create<ConstantOp>(loc, type, getPadValueAttr<ConvOp>(type));
+ Value paddedInput =
+ getPaddedInput(b, loc, convOp.input(), imIdx,
+ /* Only need to pad the window dimensions */
+ {0, static_cast<int>(imIdx.size()) - 1}, padValue);
+ Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
+ Value mulVal = ArithBuilder(b, loc).mul(filterVal, paddedInput);
+ Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
+ Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
+ b.create<StoreOpTy>(loc, addVal, output, oIdx);
} else {
- IndexedValueType I(convOp.input());
- O(oIdx) += F(fIdx) * I(imIdx);
+ Value inputVal = b.create<LoadOpTy>(loc, convOp.input(), imIdx);
+ Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
+ Value mulVal = ArithBuilder(b, loc).mul(filterVal, inputVal);
+ Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
+ Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
+ b.create<StoreOpTy>(loc, addVal, output, oIdx);
}
}
@@ -327,55 +350,62 @@ template <typename PoolingOp> static bool hasPadding(PoolingOp poolingOp) {
return false;
}
-template <typename IndexedValueType, typename PoolingOp>
-static Value getPoolingInput(PoolingOp op, ArrayRef<Value> inputIndices) {
+template <typename LoadOpTy, typename StoreOpTy, typename PoolingOp>
+static Value getPoolingInput(OpBuilder &b, Location loc, PoolingOp op,
+ ArrayRef<Value> inputIndices) {
if (hasPadding(op)) {
Type type =
op.input().getType().template cast<MemRefType>().getElementType();
- Value padValue = std_constant(type, getPadValueAttr<PoolingOp>(type));
- return getPaddedInput<MemRefIndexedValue>(op.input(), inputIndices,
- /*Pad every dimension*/ {},
- padValue);
+ Value padValue =
+ b.create<ConstantOp>(loc, type, getPadValueAttr<PoolingOp>(type));
+ return getPaddedInput(b, loc, op.input(), inputIndices,
+ /*Pad every dimension*/ {}, padValue);
}
- IndexedValueType input(op.input());
- return input(inputIndices);
+ return b.create<LoadOpTy>(loc, op.input(), inputIndices);
}
-template <typename IndexedValueType, typename OpType>
-void emitPoolingMinMaxScalarImplementation(ArrayRef<Value> allIvs, OpType op) {
- InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);
- // Emit scalar form.
- IndexedValueType output(op.output());
- Value lhs = output(indices.outputs);
- Value rhs = getPoolingInput<IndexedValueType>(op, indices.inputs);
- using edsc::op::sgt;
- using edsc::op::slt;
- Value value = std::is_same<OpType, PoolingMinOp>()
- ? std_select(slt(lhs, rhs), lhs, rhs)
- : std_select(sgt(lhs, rhs), lhs, rhs);
- output(indices.outputs) = value;
+template <typename LoadOpTy, typename StoreOpTy, typename OpType>
+void emitPoolingMinMaxScalarImplementation(OpBuilder &b, Location loc,
+ ArrayRef<Value> allIvs, OpType op) {
+ InputAndOutputIndices indices = getInputAndOutputIndices(b, loc, allIvs, op);
+ Value lhs = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
+ Value rhs = getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
+ Value value = llvm::TypeSwitch<Operation *, Value>(op)
+ .Case([&](PoolingMinOp poolingOp) {
+ return ArithBuilder(b, loc).select(
+ ArithBuilder(b, loc).slt(lhs, rhs), lhs, rhs);
+ })
+ .Case([&](PoolingMaxOp poolingOp) {
+ return ArithBuilder(b, loc).select(
+ ArithBuilder(b, loc).sgt(lhs, rhs), lhs, rhs);
+ })
+ .Default([&](auto) { return Value(); });
+ b.create<StoreOpTy>(loc, value, op.output(), indices.outputs);
}
-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMaxOp op) {
- emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMaxOp>(allIvs,
- op);
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+ ArrayRef<Value> allIvs, PoolingMaxOp op) {
+ emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMaxOp>(
+ b, loc, allIvs, op);
}
-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMinOp op) {
- emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMinOp>(allIvs,
- op);
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+ ArrayRef<Value> allIvs, PoolingMinOp op) {
+ emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMinOp>(
+ b, loc, allIvs, op);
}
-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingSumOp op) {
- auto indices = getInputAndOutputIndices(allIvs, op);
- IndexedValueType output(op.output());
-
- // Emit scalar form.
- output(indices.outputs) +=
- getPoolingInput<IndexedValueType>(op, indices.inputs);
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+ ArrayRef<Value> allIvs, PoolingSumOp op) {
+ auto indices = getInputAndOutputIndices(b, loc, allIvs, op);
+ Value inputVal =
+ getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
+ Value outputVal = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
+ Value added = ArithBuilder(b, loc).add(outputVal, inputVal);
+ b.create<StoreOpTy>(loc, added, op.output(), indices.outputs);
}
/// Replace the index operations in the body of the loop nest by the matching
@@ -413,8 +443,12 @@ static void replaceIndexOpsByInductionVariables(LinalgOp linalgOp,
template <typename LoopTy>
static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
LinalgOp linalgOp) {
- using IndexedValueTy = typename GenerateLoopNest<LoopTy>::IndexedValueTy;
- ScopedContext scope(rewriter, linalgOp.getLoc());
+ using LoadOpTy =
+ typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
+ AffineLoadOp, memref::LoadOp>::type;
+ using StoreOpTy =
+ typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
+ AffineStoreOp, memref::StoreOp>::type;
// Canonicalize indexed_generic operations before lowering them to loops.
if (isa<IndexedGenericOp>(linalgOp))
@@ -428,16 +462,18 @@ static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
auto loopRanges = linalgOp.createLoopRanges(rewriter, linalgOp.getLoc());
auto iteratorTypes = llvm::to_vector<4>(linalgOp.iterator_types().getValue());
- SmallVector<Value, 4> allIvs;
+ SmallVector<Value> allIvs;
GenerateLoopNest<LoopTy>::doit(
- loopRanges, linalgOp, iteratorTypes,
- [&](ValueRange ivs, ValueRange iterArgs) -> scf::ValueVector {
+ rewriter, linalgOp.getLoc(), loopRanges, linalgOp, iteratorTypes,
+ [&](OpBuilder &b, Location loc, ValueRange ivs,
+ ValueRange iterArgs) -> scf::ValueVector {
assert(iterArgs.empty() && "unexpected iterArgs");
allIvs.append(ivs.begin(), ivs.end());
llvm::TypeSwitch<Operation *>(linalgOp)
.Case<ConvOp, PoolingMaxOp, PoolingMinOp, PoolingSumOp, LinalgOp>(
[&](auto op) {
- emitScalarImplementation<IndexedValueTy>(allIvs, op);
+ emitScalarImplementation<LoadOpTy, StoreOpTy>(b, loc, allIvs,
+ op);
})
.Default([&](Operation *op) { assert(false && "unexpected op"); });
return scf::ValueVector{};
@@ -499,7 +535,7 @@ struct TiledLoopToSCFPattern : public OpRewritePattern<TiledLoopOp> {
tiledLoop.upperBound(), tiledLoop.step(),
[&](OpBuilder &builder, Location loc, ValueRange ivs) {
// Move body without its terminator.
- SmallVector<Value, 16> newBlockArgs;
+ SmallVector<Value> newBlockArgs;
newBlockArgs.append(ivs.begin(), ivs.end());
newBlockArgs.append(tiledLoop.inputs().begin(),
tiledLoop.inputs().end());
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index bdc1d7097ccd..9450d581edec 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -19,7 +19,6 @@
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/AffineExpr.h"
@@ -225,69 +224,67 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
// 2. Create the tiled loops.
LinalgOp res = op;
SmallVector<Value, 4> ivs, tensorResults;
- GenerateLoopNest<LoopTy>::doit(
- loopRanges, op, iteratorTypes,
- [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector {
- auto &b = ScopedContext::getBuilderRef();
- auto loc = ScopedContext::getLocation();
- ivs.assign(localIvs.begin(), localIvs.end());
-
- // When an `interchangeVector` is present, it has been applied to the
- // loop ranges and the iterator types. Apply its inverse to the
- // resulting loop `ivs` to match the op definition.
- SmallVector<Value, 4> interchangedIvs;
- if (!options.interchangeVector.empty())
- interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
- else
- interchangedIvs.assign(ivs.begin(), ivs.end());
-
- assert(op.getNumOutputTensors() == iterArgs.size() &&
- "num output tensors must match number of loop iter arguments");
-
- auto operands = llvm::to_vector<4>(op.getInputs());
- SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
- // TODO: thanks to simplifying assumption we do not need to worry about
- // order of output buffers and tensors: there is only ever one kind.
- assert(outputBuffers.empty() || iterArgs.empty());
- operands.append(outputBuffers.begin(), outputBuffers.end());
- operands.append(iterArgs.begin(), iterArgs.end());
- auto sizeBounds =
- applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
- SmallVector<Value, 4> tiledOperands = makeTiledShapes(
- b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
- auto nonShapedOperands = op.getAssumedNonShapedOperands();
- tiledOperands.append(nonShapedOperands.begin(),
- nonShapedOperands.end());
-
- // TODO: use an interface/adaptor to avoid leaking position in
- // `tiledOperands`.
- SmallVector<Type, 4> resultTensorTypes;
- for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
- resultTensorTypes.push_back(
- tiledOperands[opOperand->getOperandNumber()].getType());
-
- res = op.clone(b, loc, resultTensorTypes, tiledOperands);
-
- // Insert a subtensor_insert for each output tensor.
- unsigned resultIdx = 0;
- for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
- // TODO: use an interface/adaptor to avoid leaking position in
- // `tiledOperands`.
- Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
- if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
- tensorResults.push_back(b.create<SubTensorInsertOp>(
- loc, subtensor.source().getType(), res->getResult(resultIdx),
- subtensor.source(), subtensor.offsets(), subtensor.sizes(),
- subtensor.strides(), subtensor.static_offsets(),
- subtensor.static_sizes(), subtensor.static_strides()));
- } else {
- tensorResults.push_back(res->getResult(resultIdx));
- }
- ++resultIdx;
- }
- return scf::ValueVector(tensorResults.begin(), tensorResults.end());
- },
- options.distribution);
+ auto tiledLoopBodyBuilder = [&](OpBuilder &b, Location loc,
+ ValueRange localIvs,
+ ValueRange iterArgs) -> scf::ValueVector {
+ ivs.assign(localIvs.begin(), localIvs.end());
+
+ // When an `interchangeVector` is present, it has been applied to the
+ // loop ranges and the iterator types. Apply its inverse to the
+ // resulting loop `ivs` to match the op definition.
+ SmallVector<Value, 4> interchangedIvs;
+ if (!options.interchangeVector.empty())
+ interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
+ else
+ interchangedIvs.assign(ivs.begin(), ivs.end());
+
+ assert(op.getNumOutputTensors() == iterArgs.size() &&
+ "num output tensors must match number of loop iter arguments");
+
+ auto operands = llvm::to_vector<4>(op.getInputs());
+ SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
+ // TODO: thanks to simplifying assumption we do not need to worry about
+ // order of output buffers and tensors: there is only ever one kind.
+ assert(outputBuffers.empty() || iterArgs.empty());
+ operands.append(outputBuffers.begin(), outputBuffers.end());
+ operands.append(iterArgs.begin(), iterArgs.end());
+ auto sizeBounds =
+ applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
+ SmallVector<Value, 4> tiledOperands = makeTiledShapes(
+ b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
+ auto nonShapedOperands = op.getAssumedNonShapedOperands();
+ tiledOperands.append(nonShapedOperands.begin(), nonShapedOperands.end());
+
+ // TODO: use an interface/adaptor to avoid leaking position in
+ // `tiledOperands`.
+ SmallVector<Type, 4> resultTensorTypes;
+ for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
+ resultTensorTypes.push_back(
+ tiledOperands[opOperand->getOperandNumber()].getType());
+
+ res = op.clone(b, loc, resultTensorTypes, tiledOperands);
+
+ // Insert a subtensor_insert for each output tensor.
+ unsigned resultIdx = 0;
+ for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
+ // TODO: use an interface/adaptor to avoid leaking position in
+ // `tiledOperands`.
+ Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
+ if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
+ tensorResults.push_back(b.create<SubTensorInsertOp>(
+ loc, subtensor.source().getType(), res->getResult(resultIdx),
+ subtensor.source(), subtensor.offsets(), subtensor.sizes(),
+ subtensor.strides(), subtensor.static_offsets(),
+ subtensor.static_sizes(), subtensor.static_strides()));
+ } else {
+ tensorResults.push_back(res->getResult(resultIdx));
+ }
+ ++resultIdx;
+ }
+ return scf::ValueVector(tensorResults.begin(), tensorResults.end());
+ };
+ GenerateLoopNest<LoopTy>::doit(b, op.getLoc(), loopRanges, op, iteratorTypes,
+ tiledLoopBodyBuilder, options.distribution);
// 3. Transform IndexOp results w.r.t. the tiling.
transformIndexOps(b, res, ivs, loopIndexToRangeIndex);
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 0bba27d931df..21c2240b4683 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -16,7 +16,6 @@
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -197,15 +196,14 @@ IntegerAttr getSmallestBoundingIndex(Value size) {
/// Specialization to build an scf "for" nest.
template <>
void GenerateLoopNest<scf::ForOp>::doit(
- ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+ OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
- function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+ function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+ ValueRange)>
+ bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> distributionOptions) {
auto iterArgInitValues = linalgOp.getOutputTensors();
// Create procInfo so it dominates loops, if appropriate.
- OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
- Location loc = edsc::ScopedContext::getLocation();
-
SmallVector<ProcInfo, 4> procInfo;
SmallVector<DistributionMethod, 0> distributionMethod;
if (distributionOptions.hasValue()) {
@@ -219,13 +217,13 @@ void GenerateLoopNest<scf::ForOp>::doit(
distributionMethod = distributionOptions->distributionMethod;
if (distributionMethod.size() < parallelLoopRanges.size())
parallelLoopRanges.resize(distributionMethod.size());
- procInfo = distributionOptions->procInfo(builder, loc, parallelLoopRanges);
+ procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges);
}
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
- LoopNest loopNest =
- edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
+ LoopNest loopNest = mlir::scf::buildLoopNest(
+ b, loc, lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
if (!distributionOptions || loopNest.loops.empty())
return;
@@ -246,9 +244,11 @@ void GenerateLoopNest<scf::ForOp>::doit(
/// Specialization to build affine "for" nest.
template <>
void GenerateLoopNest<AffineForOp>::doit(
- ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+ OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
- function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+ function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+ ValueRange)>
+ bodyBuilderFn,
Optional<LinalgLoopDistributionOptions>) {
auto iterArgInitValues = linalgOp.getOutputTensors();
assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
@@ -264,38 +264,36 @@ void GenerateLoopNest<AffineForOp>::doit(
constantSteps.push_back(op.getValue());
}
- auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
- bodyBuilderFn(ivs, {});
- };
- edsc::affineLoopNestBuilder(lbs, ubs, constantSteps,
- bodyBuilderWithoutIterArgsFn);
+ mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
+ [&](OpBuilder &b, Location loc, ValueRange ivs) {
+ bodyBuilderFn(b, loc, ivs, {});
+ });
}
/// Specialization to build an linalg.tiled_loop
template <>
void GenerateLoopNest<TiledLoopOp>::doit(
- ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+ OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
- function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+ function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+ ValueRange)>
+ bodyBuilderFn,
Optional<LinalgLoopDistributionOptions>) {
- OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
- Location loc = edsc::ScopedContext::getLocation();
SmallVector<ProcInfo, 2> procInfo;
-
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange ivs, ValueRange inputs,
ValueRange outputs) {
- ScopedContext context(nestedBuilder, nestedLoc);
- scf::ValueVector results = bodyBuilderFn(ivs, linalgOp.getOutputTensors());
+ scf::ValueVector results = bodyBuilderFn(nestedBuilder, nestedLoc, ivs,
+ linalgOp.getOutputTensors());
nestedBuilder.create<linalg::YieldOp>(nestedLoc, results);
};
- auto tiledLoop = builder.create<TiledLoopOp>(
+ auto tiledLoop = b.create<TiledLoopOp>(
loc, lbs, ubs, steps, linalgOp.getInputs(), linalgOp.getOutputs(),
- builder.getArrayAttr(iteratorTypes), wrappedBuilderFn);
+ b.getArrayAttr(iteratorTypes), wrappedBuilderFn);
// Replace inputs/outputs with the corresponding region args.
auto isInsideTiledLoop = [&](OpOperand &operand) {
@@ -310,9 +308,9 @@ void GenerateLoopNest<TiledLoopOp>::doit(
}
/// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
-void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
- Value procId, Value nprocs, Value &lb,
- Value &ub, Value &step) {
+void updateBoundsForCyclicDistribution(OpBuilder &b, Location loc, Value procId,
+ Value nprocs, Value &lb, Value &ub,
+ Value &step) {
using edsc::op::operator+;
using edsc::op::operator*;
lb = lb + (procId * step);
@@ -329,20 +327,22 @@ void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
// TODO: this function can be made iterative instead. However, it
// will have at most as many recursive calls as nested loops, which rarely
// exceeds 10.
-static void
-generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
- ArrayRef<Attribute> iteratorTypes,
- function_ref<void(ValueRange)> bodyBuilderFn,
- SmallVectorImpl<Value> &ivStorage,
- ArrayRef<DistributionMethod> distributionMethod = {}) {
+static void generateParallelLoopNest(
+ OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
+ ValueRange steps, ArrayRef<Attribute> iteratorTypes,
+ function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
+ SmallVectorImpl<Value> &ivStorage,
+ ArrayRef<DistributionMethod> distributionMethod = {}) {
assert(lbs.size() == ubs.size());
assert(lbs.size() == steps.size());
assert(lbs.size() == iteratorTypes.size());
// If there are no (more) loops to be generated, generate the body and be
// done with it.
- if (iteratorTypes.empty())
- return bodyBuilderFn(ivStorage);
+ if (iteratorTypes.empty()) {
+ bodyBuilderFn(b, loc, ivStorage);
+ return;
+ }
// Find the outermost parallel loops and drop their types from the list.
unsigned nLoops = iteratorTypes.size();
@@ -353,27 +353,29 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
// recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
// in this case.
if (nOuterPar == 0) {
- edsc::loopNestBuilder(lbs[0], ubs[0], steps[0], [&](Value iv) {
- ivStorage.push_back(iv);
- generateParallelLoopNest(lbs.drop_front(), ubs.drop_front(),
- steps.drop_front(), iteratorTypes.drop_front(),
- bodyBuilderFn, ivStorage, distributionMethod);
- });
+ LoopNest singleLoop = buildLoopNest(
+ b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
+ [&](OpBuilder &b, Location loc, ValueRange ivs) {
+ ivStorage.append(ivs.begin(), ivs.end());
+ generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(),
+ steps.drop_front(),
+ iteratorTypes.drop_front(), bodyBuilderFn,
+ ivStorage, distributionMethod);
+ });
return;
}
if (distributionMethod.empty()) {
// Generate a single parallel loop-nest operation for all outermost
// parallel loops and recurse.
- edsc::OperationBuilder<scf::ParallelOp>(
- lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
+ b.create<scf::ParallelOp>(
+ loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
steps.take_front(nOuterPar),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
- edsc::ScopedContext context(nestedBuilder, nestedLoc);
ivStorage.append(localIvs.begin(), localIvs.end());
generateParallelLoopNest(
- lbs.drop_front(nOuterPar), ubs.drop_front(nOuterPar),
- steps.drop_front(nOuterPar), iteratorTypes.drop_front(nOuterPar),
- bodyBuilderFn, ivStorage,
+ nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar),
+ ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar),
+ iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage,
(distributionMethod.size() < nOuterPar)
? ArrayRef<DistributionMethod>()
: distributionMethod.drop_front(nOuterPar));
@@ -394,15 +396,14 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
case DistributionMethod::Cyclic: {
// Generate a single parallel loop-nest operation for all outermost
// parallel loops and recurse.
- edsc::OperationBuilder<scf::ParallelOp>(
- lbs.take_front(numProcessed), ubs.take_front(numProcessed),
+ b.create<scf::ParallelOp>(
+ loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
steps.take_front(numProcessed),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
- edsc::ScopedContext context(nestedBuilder, nestedLoc);
ivStorage.append(localIvs.begin(), localIvs.end());
generateParallelLoopNest(
- lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
- steps.drop_front(numProcessed),
+ nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
+ ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
(distributionMethod.size() < numProcessed)
? ArrayRef<DistributionMethod>()
@@ -418,12 +419,13 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
for (unsigned i = 1; i < numProcessed; ++i)
cond = cond && slt(lbs[i], ubs[i]);
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
- edsc::conditionBuilder(cond, [&]() {
+ b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
generateParallelLoopNest(
- lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
+ b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed),
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
distributionMethod.drop_front(numProcessed));
+ b.create<scf::YieldOp>(loc, ValueRange{});
});
return;
}
@@ -432,7 +434,7 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
// with inner loop generation.
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
generateParallelLoopNest(
- lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
+ b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed));
return;
@@ -442,9 +444,11 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
/// Specialization for generating a mix of parallel and sequential scf loops.
template <>
void GenerateLoopNest<scf::ParallelOp>::doit(
- ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+ OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
- function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+ function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+ ValueRange)>
+ bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> distributionOptions) {
auto iterArgInitValues = linalgOp.getOutputTensors();
assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
@@ -466,7 +470,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
SmallVector<DistributionMethod, 0> distributionMethod;
if (distributionOptions) {
auto &options = distributionOptions.getValue();
- OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
+ OpBuilder &b = edsc::ScopedContext::getBuilderRef();
Location loc = edsc::ScopedContext::getLocation();
distributionMethod.assign(distributionOptions->distributionMethod.begin(),
distributionOptions->distributionMethod.end());
@@ -478,14 +482,14 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
if (distributionMethod.size() < parallelLoopRanges.size())
parallelLoopRanges.resize(distributionMethod.size());
SmallVector<ProcInfo, 2> procInfo =
- options.procInfo(builder, loc, parallelLoopRanges);
+ options.procInfo(b, loc, parallelLoopRanges);
unsigned index = 0;
for (auto iteratorType : enumerate(iteratorTypes)) {
if (index >= procInfo.size())
break;
if (isParallelIteratorType(iteratorType.value())) {
unsigned i = iteratorType.index();
- updateBoundsForCyclicDistribution(builder, loc, procInfo[index].procId,
+ updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId,
procInfo[index].nprocs, lbsStorage[i],
ubsStorage[i], stepsStorage[i]);
index++;
@@ -493,17 +497,17 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
}
}
ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
- auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
- bodyBuilderFn(ivs, {});
- };
- generateParallelLoopNest(lbs, ubs, steps, iteratorTypes,
- bodyBuilderWithoutIterArgsFn, ivs,
- distributionMethod);
+ generateParallelLoopNest(
+ b, loc, lbs, ubs, steps, iteratorTypes,
+ [&](OpBuilder &b, Location loc, ValueRange ivs) {
+ bodyBuilderFn(b, loc, ivs, {});
+ },
+ ivs, distributionMethod);
assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
}
-SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
+SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
LinalgOp linalgOp,
ArrayRef<Value> tiledOperands,
ValueRange ivs, ValueRange tileSizes,
@@ -529,7 +533,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n");
}
- MLIRContext *context = builder.getContext();
+ MLIRContext *context = b.getContext();
SmallVector<Value, 4> tiledShapes;
tiledShapes.reserve(tiledOperands.size());
for (auto en : llvm::enumerate(tiledOperands)) {
@@ -555,10 +559,10 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
for (unsigned r = 0; r < rank; ++r) {
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r);
if (!isTiled(map.getSubMap({r}), tileSizes)) {
- offsets.push_back(builder.getIndexAttr(0));
+ offsets.push_back(b.getIndexAttr(0));
Value dim = memref_dim(shapedOp, r).value;
sizes.push_back(dim);
- strides.push_back(builder.getIndexAttr(1));
+ strides.push_back(b.getIndexAttr(1));
LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
continue;
}
@@ -568,10 +572,9 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
// (i.e. the op does not subsample, stepping occurs in the loop).
auto m = map.getSubMap({r});
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: submap: " << map << "\n");
- auto offset = applyMapToValues(builder, loc, m, lbs).front();
+ auto offset = applyMapToValues(b, loc, m, lbs).front();
offsets.push_back(offset);
- auto closedIntSize =
- applyMapToValues(builder, loc, m, subShapeSizes).front();
+ auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
// Resulting size needs to be made half open interval again.
auto size = closedIntSize + std_constant_index(1);
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");
@@ -589,27 +592,29 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
AffineExpr dim0, dim1, dim2;
bindDims(context, dim0, dim1, dim2);
// Compute min(size, dim - offset) to avoid out-of-bounds accesses.
- auto minMap = AffineMap::get(
- /*dimCount=*/3, /*symbolCount=*/0, {dim0, dim1 - dim2}, context);
- Value d = memref_dim(shapedOp, r);
+ AffineMap minMap =
+ AffineMap::inferFromExprList(
+ ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
+ .front();
+ Value d = b.create<memref::DimOp>(loc, shapedOp, r);
SmallVector<Value, 4> operands{size, d, offset};
fullyComposeAffineMapAndOperands(&minMap, &operands);
- size = affine_min(builder.getIndexType(), minMap, operands);
+ size = b.create<AffineMinOp>(loc, b.getIndexType(), minMap, operands);
}
sizes.push_back(size);
LLVM_DEBUG(llvm::dbgs()
<< "makeTiledShapes: new offset: " << offset << "\n");
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: new size: " << size << "\n");
- strides.push_back(builder.getIndexAttr(1));
+ strides.push_back(b.getIndexAttr(1));
}
if (shapedType.isa<MemRefType>())
- tiledShapes.push_back(builder.create<memref::SubViewOp>(
- loc, shapedOp, offsets, sizes, strides));
+ tiledShapes.push_back(
+ b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
else
tiledShapes.push_back(
- builder.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
+ b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
}
return tiledShapes;
diff --git a/mlir/lib/Dialect/SCF/CMakeLists.txt b/mlir/lib/Dialect/SCF/CMakeLists.txt
index e3e8f96a62e6..ebab4a1b6db1 100644
--- a/mlir/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/CMakeLists.txt
@@ -1,6 +1,5 @@
add_mlir_dialect_library(MLIRSCF
SCF.cpp
- EDSC/Builders.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/LoopOps
@@ -9,7 +8,6 @@ add_mlir_dialect_library(MLIRSCF
MLIRSCFOpsIncGen
LINK_LIBS PUBLIC
- MLIREDSC
MLIRIR
MLIRLoopLikeInterface
MLIRMemRef
diff --git a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
deleted file mode 100644
index d0ac5f0c3439..000000000000
--- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//===- Builders.cpp - MLIR Declarative Builder Classes --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
-#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-
-mlir::scf::LoopNest
-mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps,
- function_ref<void(ValueRange)> fun) {
- // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
- // the expected function interface.
- assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
- return mlir::scf::buildLoopNest(
- ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
- steps, [&](OpBuilder &builder, Location loc, ValueRange ivs) {
- ScopedContext context(builder, loc);
- if (fun)
- fun(ivs);
- });
-}
-
-mlir::scf::LoopNest
-mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step,
- function_ref<void(Value)> fun) {
- // Delegates to the ValueRange-based version by wrapping the lambda.
- auto wrapper = [&](ValueRange ivs) {
- assert(ivs.size() == 1);
- if (fun)
- fun(ivs[0]);
- };
- return loopNestBuilder(ValueRange(lb), ValueRange(ub), ValueRange(step),
- wrapper);
-}
-
-mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
- Value lb, Value ub, Value step, ValueRange iterArgInitValues,
- function_ref<scf::ValueVector(Value, ValueRange)> fun) {
- // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
- // the expected function interface.
- assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
- return mlir::scf::buildLoopNest(
- ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lb, ub,
- step, iterArgInitValues,
- [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
- assert(ivs.size() == 1 && "expected one induction variable");
- ScopedContext context(builder, loc);
- if (fun)
- return fun(ivs[0], args);
- return scf::ValueVector(iterArgInitValues.begin(),
- iterArgInitValues.end());
- });
-}
-
-mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
- ValueRange lbs, ValueRange ubs, ValueRange steps,
- ValueRange iterArgInitValues,
- function_ref<scf::ValueVector(ValueRange, ValueRange)> fun) {
- // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
- // the expected function interface.
- assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
- return mlir::scf::buildLoopNest(
- ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
- steps, iterArgInitValues,
- [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
- ScopedContext context(builder, loc);
- if (fun)
- return fun(ivs, args);
- return scf::ValueVector(iterArgInitValues.begin(),
- iterArgInitValues.end());
- });
-}
-
-static std::function<void(OpBuilder &, Location)>
-wrapIfBody(function_ref<scf::ValueVector()> body, TypeRange expectedTypes) {
- (void)expectedTypes;
- return [=](OpBuilder &builder, Location loc) {
- ScopedContext context(builder, loc);
- scf::ValueVector returned = body();
- assert(ValueRange(returned).getTypes() == expectedTypes &&
- "'if' body builder returned values of unexpected type");
- builder.create<scf::YieldOp>(loc, returned);
- };
-}
-
-ValueRange
-mlir::edsc::conditionBuilder(TypeRange results, Value condition,
- function_ref<scf::ValueVector()> thenBody,
- function_ref<scf::ValueVector()> elseBody,
- scf::IfOp *ifOp) {
- assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
- assert(thenBody && "thenBody is mandatory");
-
- auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
- ScopedContext::getLocation(), results, condition,
- wrapIfBody(thenBody, results), wrapIfBody(elseBody, results));
- if (ifOp)
- *ifOp = newOp;
- return newOp.getResults();
-}
-
-static std::function<void(OpBuilder &, Location)>
-wrapZeroResultIfBody(function_ref<void()> body) {
- return [=](OpBuilder &builder, Location loc) {
- ScopedContext context(builder, loc);
- body();
- builder.create<scf::YieldOp>(loc);
- };
-}
-
-ValueRange mlir::edsc::conditionBuilder(Value condition,
- function_ref<void()> thenBody,
- function_ref<void()> elseBody,
- scf::IfOp *ifOp) {
- assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
- assert(thenBody && "thenBody is mandatory");
-
- auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
- ScopedContext::getLocation(), condition, wrapZeroResultIfBody(thenBody),
- elseBody ? llvm::function_ref<void(OpBuilder &, Location)>(
- wrapZeroResultIfBody(elseBody))
- : llvm::function_ref<void(OpBuilder &, Location)>(nullptr));
- if (ifOp)
- *ifOp = newOp;
- return {};
-}
diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir
index 4de404db7afd..59c72f657e1d 100644
--- a/mlir/test/Dialect/Linalg/affine.mlir
+++ b/mlir/test/Dialect/Linalg/affine.mlir
@@ -24,18 +24,18 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
// CHECK-SAME: [[M:arg[0-9]+]]: index
// CHECK-SAME: [[N:arg[0-9]+]]: index
// CHECK-SAME: [[K:arg[0-9]+]]: index
-// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
-// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
-// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
-// CHECK-DAG: %[[a:.*]] = affine.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECK-DAG: %[[b:.*]] = affine.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK: %[[A:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
+// CHECK: %[[B:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
+// CHECK: %[[C:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
+// CHECK: affine.for
+// CHECK: affine.for
+// CHECK: affine.for
+// CHECK-DAG: %[[a:.*]] = affine.load %[[A]]{{.*}} : memref<?x?xf32>
+// CHECK-DAG: %[[b:.*]] = affine.load %[[B]]{{.*}} : memref<?x?xf32>
// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-// CHECK-DAG: %[[c:.*]] = affine.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-DAG: %[[c:.*]] = affine.load %[[C]]{{.*}} : memref<?x?xf32>
// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
-// CHECK: affine.store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK: affine.store %[[res]], %[[C]]{{.*}} : memref<?x?xf32>
func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
@@ -49,12 +49,12 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
// CHECK: %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
-// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
+// CHECK: affine.for {{.*}}0 to %[[B]] {
+// CHECK: affine.for {{.*}}0 to %[[X0]] {
+// CHECK: affine.for {{.*}}0 to %[[K]] {
+// CHECK: affine.for {{.*}}0 to %[[Q]] {
+// CHECK: affine.for {{.*}}0 to %[[Z0]] {
+// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]]{{.*}}
// No padding needed here; only affine loads.
// CHECK-NEXT: affine.load
// CHECK-NEXT: affine.load
@@ -78,26 +78,26 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
-// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[X1]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] {
-// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
-// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
+// CHECK: affine.for {{.*}}0 to %[[B]] {
+// CHECK: affine.for {{.*}}0 to %[[X0]] {
+// CHECK: affine.for {{.*}}0 to %[[X1]] {
+// CHECK: affine.for {{.*}}0 to %[[K]] {
+// CHECK: affine.for {{.*}}0 to %[[Q]] {
+// CHECK: affine.for {{.*}}0 to %[[Z0]] {
+// CHECK: affine.for {{.*}}0 to %[[Z1]] {
+// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}
+// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}
// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
// Padded conv involves an affine.max in the memory access and this is not
// allowed by affine.load. Use memref.load in such cases.
-// CHECK: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
-// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECK: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+// CHECK: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+// CHECK: select {{.*}} : f32
+// CHECK: affine.load
+// CHECK: mulf {{.*}} : f32
+// CHECK: affine.load
+// CHECK: addf {{.*}} : f32
+// CHECK: affine.store
//----------------------------------------------------------------------------//
// Named ops to loops.
@@ -115,10 +115,10 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
-// CHECK: affine.for %[[b:.*]] = 0 to %[[B]] {
-// CHECK: affine.for %[[m:.*]] = 0 to %[[M]] {
-// CHECK: affine.for %[[n:.*]] = 0 to %[[N]] {
-// CHECK: affine.for %[[k:.*]] = 0 to %[[K]] {
+// CHECK: affine.for %[[b:.*]] = {{.*}}0 to %[[B]] {
+// CHECK: affine.for %[[m:.*]] = {{.*}}0 to %[[M]] {
+// CHECK: affine.for %[[n:.*]] = {{.*}}0 to %[[N]] {
+// CHECK: affine.for %[[k:.*]] = {{.*}}0 to %[[K]] {
// CHECK: %[[va:.*]] = affine.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
// CHECK: %[[vb:.*]] = affine.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
// CHECK: %[[vc:.*]] = affine.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index b958e0acc300..c469160d6e86 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck --check-prefix=CHECKLOOP %s
+// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s
// RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s
// Test that we can lower all the way to LLVM without crashing, don't check results here.
// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -o=/dev/null 2>&1
-// CHECKLOOP-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
-// CHECKLOOP-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
-// CHECKLOOP-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>
-// CHECKLOOP-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
-// CHECKLOOP-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>
+// CHECK-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
+// CHECK-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
+// CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>
+// CHECK-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
+// CHECK-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>
-// CHECKLOOP-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// CHECKLOOP-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)>
-// CHECKLOOP-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)>
-// CHECKLOOP-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)>
-// CHECKLOOP-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)>
-// CHECKLOOP-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)>
+// CHECK-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
+// CHECK-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)>
+// CHECK-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)>
+// CHECK-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)>
+// CHECK-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)>
+// CHECK-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)>
// CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECKPARALLEL-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
@@ -30,7 +30,6 @@
// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)>
// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)>
-
func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
%c0 = constant 0 : index
%c1 = constant 1 : index
@@ -41,22 +40,22 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
outs(%C: memref<?x?xf32>)
return
}
-// CHECKLOOP-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
-// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index
-// CHECKLOOP-SAME: [[N:arg[0-9]+]]: index
-// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index
-// CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-// CHECKLOOP-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
+// CHECK-SAME: [[M:arg[0-9]+]]: index
+// CHECK-SAME: [[N:arg[0-9]+]]: index
+// CHECK-SAME: [[K:arg[0-9]+]]: index
+// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK: scf.for {{.*}} to %[[M]]
+// CHECK: scf.for {{.*}} to %[[N]]
+// CHECK: scf.for {{.*}} to %[[K]]
+// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
+// CHECK: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index
@@ -65,8 +64,8 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
+// CHECKPARALLEL: scf.for {{.*}} to %[[K]]
// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -86,20 +85,20 @@ func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) {
outs(%4 : memref<?xf32>)
return
}
-// CHECKLOOP-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
-// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index
-// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index
-// CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-// CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
-// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-// CHECKLOOP-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32>
-// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}] : memref<?xf32>
+// CHECK-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
+// CHECK-SAME: [[M:arg[0-9]+]]: index
+// CHECK-SAME: [[K:arg[0-9]+]]: index
+// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
+// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
+// CHECK: scf.for {{.*}} to %[[M]]
+// CHECK: scf.for {{.*}} to %[[K]]
+// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
+// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32>
+// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
+// CHECK: store %[[res]], %[[C]][%{{.*}}] : memref<?xf32>
// CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index
@@ -108,7 +107,7 @@ func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) {
// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+// CHECKPARALLEL: scf.for {{.*}} to %[[K]]
// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -127,25 +126,25 @@ func @dot(%arg0: memref<?xi8>, %M: index) {
outs(%3 : memref<f32>)
return
}
-// CHECKLOOP-LABEL: func @dot(%{{.*}}: memref<?xi8>,
-// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index
-// CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
-// CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
-// CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32>
-// CHECKLOOP-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
-// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-// CHECKLOOP-DAG: %[[c:.*]] = memref.load %[[C]][] : memref<f32>
-// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %[[C]][] : memref<f32>
+// CHECK-LABEL: func @dot(%{{.*}}: memref<?xi8>,
+// CHECK-SAME: [[K:arg[0-9]+]]: index
+// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
+// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
+// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
+// CHECK: scf.for {{.*}} to %[[K]]
+// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32>
+// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
+// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][] : memref<f32>
+// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
+// CHECK: store %[[res]], %[[C]][] : memref<f32>
// CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref<?xi8>,
// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index
// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+// CHECKPARALLEL: scf.for {{.*}} to %[[K]]
// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32>
// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -160,21 +159,21 @@ func @dot_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf3
outs(%arg2: memref<f32>)
return
}
-// CHECKLOOP-LABEL: func @dot_view(
-// CHECKLOOP: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<f32>) {
-// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
-// CHECKLOOP-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
-// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-// CHECKLOOP-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref<f32>
-// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %{{.*}}[] : memref<f32>
+// CHECK-LABEL: func @dot_view(
+// CHECK: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<f32>) {
+// CHECK: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]>
+// CHECK: scf.for {{.*}} to %[[K]]
+// CHECK-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+// CHECK-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref<f32>
+// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
+// CHECK: store %[[res]], %{{.*}}[] : memref<f32>
// CHECKPARALLEL-LABEL: func @dot_view(
// CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<f32>) {
// CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]>
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+// CHECKPARALLEL: scf.for {{.*}} to %[[K]]
// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -186,10 +185,10 @@ func @fill_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: f32) {
linalg.fill(%arg0, %arg1) : memref<?xf32, offset: ?, strides: [1]>, f32
return
}
-// CHECKLOOP-LABEL: func @fill_view(
-// CHECKLOOP: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: f32) {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+// CHECK-LABEL: func @fill_view(
+// CHECK: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: f32) {
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
// CHECKPARALLEL-LABEL: func @fill_view(
// CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: f32) {
@@ -200,8 +199,8 @@ func @fill_view0(%arg0: memref<f32>, %arg1: f32) {
linalg.fill(%arg0, %arg1) : memref<f32>, f32
return
}
-// CHECKLOOP-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
-// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref<f32>
+// CHECK-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
+// CHECK: store %{{.*}}, %{{.*}}[] : memref<f32>
// CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref<f32>
@@ -210,27 +209,27 @@ func @fill_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
linalg.fill(%arg0, %arg1) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, f32
return
}
-// CHECKLOOP-LABEL: func @fill_view3(
-// CHECKLOOP: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: f32) {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK-LABEL: func @fill_view3(
+// CHECK: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: f32) {
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL-LABEL: func @fill_view3(
// CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: f32) {
// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
func @copy_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>) {
linalg.copy(%arg0, %arg1) : memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]>
return
}
-// CHECKLOOP-LABEL: func @copy_view(
-// CHECKLOOP: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>) {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
-// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+// CHECK-LABEL: func @copy_view(
+// CHECK: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>) {
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+// CHECK: store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
// CHECKPARALLEL-LABEL: func @copy_view(
// CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>) {
@@ -242,12 +241,12 @@ func @copy_view0(%arg0: memref<f32>, %arg1: memref<f32>) {
linalg.copy(%arg0, %arg1) : memref<f32>, memref<f32>
return
}
-// CHECKLOOP-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) {
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[] : memref<f32>
-// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref<f32>
+// CHECK-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) {
+// CHECK: memref.load %{{.*}}[] : memref<f32>
+// CHECK: store %{{.*}}, %{{.*}}[] : memref<f32>
// CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) {
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[] : memref<f32>
+// CHECKPARALLEL: memref.load %{{.*}}[] : memref<f32>
// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref<f32>
func @copy_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
@@ -256,43 +255,43 @@ func @copy_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
-// CHECKLOOP-LABEL: func @copy_view3
-// CHECKLOOP: (%{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-// CHECKLOOP: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK-LABEL: func @copy_view3
+// CHECK: (%{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: scf.for {{.*}} to %{{.*}}
+// CHECK: %[[L:.*]] = memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: store %[[L]], {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL-LABEL: func @copy_view3
// CHECKPARALLEL: (%{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: %[[L:.*]] = memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: store %[[L]], {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
return
}
-// CHECKLOOP-LABEL: func @conv_view3(
-// CHECKLOOP: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
-// CHECKLOOP: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %[[Q:.*]] = memref.dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKLOOP: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK-LABEL: func @conv_view3(
+// CHECK: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
+// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: %[[Q:.*]] = memref.dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: scf.for {{.*}} to %[[B]]
+// CHECK: scf.for {{.*}} to %[[X0]]
+// CHECK: scf.for {{.*}} to %[[K]]
+// CHECK: scf.for {{.*}} to %[[Q]]
+// CHECK: scf.for {{.*}} to %[[Z0]]
+// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECK: memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: mulf
+// CHECK: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: addf
+// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL-LABEL: func @conv_view3(
// CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
@@ -302,44 +301,44 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
// CHECKPARALLEL: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: scf.for {{.*}} to %[[Q]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]]
+// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: mulf
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: addf
+// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]>
func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg2: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>) {
linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>
return
}
-// CHECKLOOP-LABEL: func @conv_view4(
-// CHECKLOOP: %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>) {
-// CHECKLOOP: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
-// CHECKLOOP: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK-LABEL: func @conv_view4(
+// CHECK: %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>) {
+// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: scf.for {{.*}} to %[[B]]
+// CHECK: scf.for {{.*}} to %[[X0]]
+// CHECK: scf.for {{.*}} to %[[X1]]
+// CHECK: scf.for {{.*}} to %[[K]]
+// CHECK: scf.for {{.*}} to %[[Q]]
+// CHECK: scf.for {{.*}} to %[[Z0]]
+// CHECK: scf.for {{.*}} to %[[Z1]]
+// CHECK: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]]
+// CHECK: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]]
+// CHECK: memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: mulf
+// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECK: addf
+// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]>
// CHECKPARALLEL-LABEL: func @conv_view4(
// CHECKPARALLEL: %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>) {
@@ -351,17 +350,17 @@ func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %
// CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
// CHECKPARALLEL: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
-// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: scf.for {{.*}} to %[[Q]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[Z1]]
+// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]]
+// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]]
+// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: mulf
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: addf
+// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]>
func @conv_padding(%arg0: memref<?x?x?x?xf32>,
%arg1: memref<?x?x?x?xf32>,
@@ -372,34 +371,34 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>
return
}
-// CHECKLOOP-LABEL: func @conv_padding
-// CHECKLOOP: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
-// CHECKLOOP: %[[ZERO:.*]] = constant 0.000000e+00 : f32
-// CHECKLOOP: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
-// CHECKLOOP: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
-// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKLOOP: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+// CHECK-LABEL: func @conv_padding
+// CHECK: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
+// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32
+// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32>
+// CHECK: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32>
+// CHECK: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32>
+// CHECK: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32>
+// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
+// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
+// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
+// CHECK: scf.for {{.*}} to %[[B]]
+// CHECK: scf.for {{.*}} to %[[X0]]
+// CHECK: scf.for {{.*}} to %[[X1]]
+// CHECK: scf.for {{.*}} to %[[K]]
+// CHECK: scf.for {{.*}} to %[[Q]]
+// CHECK: scf.for {{.*}} to %[[Z0]]
+// CHECK: scf.for {{.*}} to %[[Z1]]
+// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}
+// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}
+// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
+// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
+// CHECK: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+// CHECK: select %{{.*}},
+// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32>
+// CHECK: mulf
+// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32>
+// CHECK: addf
+// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32>
// CHECKPARALLEL-LABEL: func @conv_padding
// CHECKPARALLEL: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
@@ -412,20 +411,20 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
// CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
// CHECKPARALLEL: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
-// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
+// CHECKPARALLEL: scf.for {{.*}} to %[[Q]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[Z1]]
+// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}
+// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}
// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: select %{{.*}},
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32>
+// CHECKPARALLEL: mulf
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32>
+// CHECKPARALLEL: addf
+// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32>
func @pooling_max(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xi32>,
@@ -434,36 +433,36 @@ func @pooling_max(%arg0: memref<?x?xf32>,
memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
return
}
-// CHECKLOOP-LABEL: func @pooling_max
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-LABEL: func @pooling_max
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]]
+// CHECK: memref.load {{.*}} : memref<?x?xf32>
+// CHECK: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+// CHECK: %[[RES:.*]] = select %{{.*}},
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32>
// CHECKPARALLEL-LABEL: func @pooling_max
// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]]
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?xf32>
+// CHECKPARALLEL: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}},
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32>
func @pooling_max_padding(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xi32>,
@@ -472,26 +471,26 @@ func @pooling_max_padding(%arg0: memref<?x?xf32>,
memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
return
}
-// CHECKLOOP-LABEL: func @pooling_max_padding
-// CHECKLOOP: %[[PAD:.*]] = constant 0xFF800000 : f32
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
-// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
-// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
-// CHECKLOOP: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32
-// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-LABEL: func @pooling_max_padding
+// CHECK: %[[PAD:.*]] = constant 0xFF800000 : f32
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
+// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
+// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
+// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
+// CHECK: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32
+// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32>
// CHECKPARALLEL-LABEL: func @pooling_max_padding
// CHECKPARALLEL: %[[PAD:.*]] = constant 0xFF800000 : f32
@@ -499,19 +498,19 @@ func @pooling_max_padding(%arg0: memref<?x?xf32>,
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
// CHECKPARALLEL: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32
// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32>
func @pooling_max_padding_i32(%arg0: memref<?x?xi32>,
%arg1: memref<?x?xi32>,
@@ -520,26 +519,26 @@ func @pooling_max_padding_i32(%arg0: memref<?x?xi32>,
memref<?x?xi32>, memref<?x?xi32>, memref<?x?xi32>
return
}
-// CHECKLOOP-LABEL: func @pooling_max_padding_i32
-// CHECKLOOP: %[[PAD:.*]] = constant -2147483648 : i32
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
-// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
-// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
-// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
-// CHECKLOOP: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32
-// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECK-LABEL: func @pooling_max_padding_i32
+// CHECK: %[[PAD:.*]] = constant -2147483648 : i32
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32>
+// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
+// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
+// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
+// CHECK: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32
+// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xi32>
// CHECKPARALLEL-LABEL: func @pooling_max_padding_i32
// CHECKPARALLEL: %[[PAD:.*]] = constant -2147483648 : i32
@@ -547,19 +546,19 @@ func @pooling_max_padding_i32(%arg0: memref<?x?xi32>,
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32>
// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
// CHECKPARALLEL: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32
// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xi32>
func @pooling_min(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xi32>,
@@ -568,36 +567,36 @@ func @pooling_min(%arg0: memref<?x?xf32>,
memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
return
}
-// CHECKLOOP-LABEL: func @pooling_min
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-LABEL: func @pooling_min
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]]
+// CHECK: memref.load {{.*}} : memref<?x?xf32>
+// CHECK: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+// CHECK: %[[RES:.*]] = select %{{.*}},
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32>
// CHECKPARALLEL-LABEL: func @pooling_min
// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]]
+// CHECKPARALLEL: memref.load {{.*}} : memref<?x?xf32>
+// CHECKPARALLEL: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}},
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32>
func @pooling_min_padding(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xi32>,
@@ -606,26 +605,26 @@ func @pooling_min_padding(%arg0: memref<?x?xf32>,
memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
return
}
-// CHECKLOOP-LABEL: func @pooling_min_padding
-// CHECKLOOP: %[[PAD:.*]] = constant 0x7F800000 : f32
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
-// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
-// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
-// CHECKLOOP: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32
-// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-LABEL: func @pooling_min_padding
+// CHECK: %[[PAD:.*]] = constant 0x7F800000 : f32
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
+// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
+// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
+// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
+// CHECK: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32
+// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32>
// CHECKPARALLEL-LABEL: func @pooling_min_padding
// CHECKPARALLEL: %[[PAD:.*]] = constant 0x7F800000 : f32
@@ -633,19 +632,19 @@ func @pooling_min_padding(%arg0: memref<?x?xf32>,
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
// CHECKPARALLEL: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32
// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32>
func @pooling_min_padding_i32(%arg0: memref<?x?xi32>,
%arg1: memref<?x?xi32>,
@@ -654,26 +653,26 @@ func @pooling_min_padding_i32(%arg0: memref<?x?xi32>,
memref<?x?xi32>, memref<?x?xi32>, memref<?x?xi32>
return
}
-// CHECKLOOP-LABEL: func @pooling_min_padding_i32
-// CHECKLOOP: %[[PAD:.*]] = constant 2147483647 : i32
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
-// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
-// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
-// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
-// CHECKLOOP: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32
-// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECK-LABEL: func @pooling_min_padding_i32
+// CHECK: %[[PAD:.*]] = constant 2147483647 : i32
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32>
+// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
+// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
+// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
+// CHECK: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32
+// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xi32>
// CHECKPARALLEL-LABEL: func @pooling_min_padding_i32
// CHECKPARALLEL: %[[PAD:.*]] = constant 2147483647 : i32
@@ -681,19 +680,19 @@ func @pooling_min_padding_i32(%arg0: memref<?x?xi32>,
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32>
// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
// CHECKPARALLEL: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32
// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xi32>
func @pooling_sum(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xi32>,
@@ -702,36 +701,36 @@ func @pooling_sum(%arg0: memref<?x?xf32>,
memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
return
}
-// CHECKLOOP-LABEL: func @pooling_sum
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-LABEL: func @pooling_sum
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]]
+// CHECK: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+// CHECK: %[[LHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
+// CHECK: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32>
// CHECKPARALLEL-LABEL: func @pooling_sum
// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]]
// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: %[[LHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
// CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32>
func @pooling_sum_padding(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xi32>,
@@ -740,25 +739,25 @@ func @pooling_sum_padding(%arg0: memref<?x?xf32>,
memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32>
return
}
-// CHECKLOOP-LABEL: func @pooling_sum_padding
-// CHECKLOOP: %[[PAD:.*]] = constant 0.000000e+00 : f32
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
-// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
-// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
-// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-// CHECKLOOP: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECK-LABEL: func @pooling_sum_padding
+// CHECK: %[[PAD:.*]] = constant 0.000000e+00 : f32
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
+// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
+// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
+// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
+// CHECK: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32>
// CHECKPARALLEL-LABEL: func @pooling_sum_padding
// CHECKPARALLEL: %[[PAD:.*]] = constant 0.000000e+00 : f32
@@ -766,18 +765,18 @@ func @pooling_sum_padding(%arg0: memref<?x?xf32>,
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
-// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32>
// CHECKPARALLEL: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32>
func @pooling_sum_padding_i32(%arg0: memref<?x?xi32>,
%arg1: memref<?x?xi32>,
@@ -786,25 +785,25 @@ func @pooling_sum_padding_i32(%arg0: memref<?x?xi32>,
memref<?x?xi32>, memref<?x?xi32>, memref<?x?xi32>
return
}
-// CHECKLOOP-LABEL: func @pooling_sum_padding_i32
-// CHECKLOOP: %[[PAD:.*]] = constant 0 : i32
-// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
-// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
-// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
-// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
-// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
-// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
-// CHECKLOOP: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32
-// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECK-LABEL: func @pooling_sum_padding_i32
+// CHECK: %[[PAD:.*]] = constant 0 : i32
+// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
+// CHECK: scf.for {{.*}} to %[[OX]]
+// CHECK: scf.for {{.*}} to %[[OY]]
+// CHECK: scf.for {{.*}} to %[[WX]]
+// CHECK: scf.for {{.*}} to %[[WY]]
+// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
+// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
+// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
+// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
+// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32>
+// CHECK: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32
+// CHECK: store %[[RES]], {{.*}} : memref<?x?xi32>
// CHECKPARALLEL-LABEL: func @pooling_sum_padding_i32
// CHECKPARALLEL: %[[PAD:.*]] = constant 0 : i32
@@ -812,18 +811,18 @@ func @pooling_sum_padding_i32(%arg0: memref<?x?xi32>,
// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
-// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
+// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]])
+// CHECKPARALLEL: scf.for {{.*}} to %[[WX]]
+// CHECKPARALLEL: scf.for {{.*}} to %[[WY]]
+// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]]
+// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]]
// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
-// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32>
// CHECKPARALLEL: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32
-// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xi32>
#accesses = [
affine_map<(i, j, k) -> (i, j)>,
@@ -850,17 +849,17 @@ func @generic_region(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1:
}
return
}
-// CHECKLOOP-LABEL: @generic_region
-// CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-// CHECKLOOP: scf.for %[[j:.*]] = {{.*}}
-// CHECKLOOP: scf.for %[[k:.*]] = {{.*}}
-// CHECKLOOP: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECKLOOP: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: %[[d:.*]] = mulf %[[a]], %[[b]] : f32
-// CHECKLOOP: %[[e:.*]] = addf %[[c]], %[[d]] : f32
-// CHECKLOOP: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKLOOP: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK-LABEL: @generic_region
+// CHECK: scf.for %[[i:.*]] = {{.*}}
+// CHECK: scf.for %[[j:.*]] = {{.*}}
+// CHECK: scf.for %[[k:.*]] = {{.*}}
+// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: %[[d:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK: %[[e:.*]] = addf %[[c]], %[[d]] : f32
+// CHECK: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECK: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL-LABEL: @generic_region
// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
@@ -905,21 +904,21 @@ func @generic_index_region(
return
}
-// CHECKLOOP-LABEL: @generic_index_region
-// CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-// CHECKLOOP: scf.for %[[j:.*]] = {{.*}}
-// CHECKLOOP: scf.for %[[k:.*]] = {{.*}}
-// CHECKLOOP: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]]
-// CHECKLOOP: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]]
-// CHECKLOOP: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]]
-// CHECKLOOP: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32
-// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index
-// CHECKLOOP: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index
-// CHECKLOOP: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32
-// CHECKLOOP: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32
-// CHECKLOOP: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32
-// CHECKLOOP: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]]
-// CHECKLOOP: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]]
+// CHECK-LABEL: @generic_index_region
+// CHECK: scf.for %[[i:.*]] = {{.*}}
+// CHECK: scf.for %[[j:.*]] = {{.*}}
+// CHECK: scf.for %[[k:.*]] = {{.*}}
+// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]]
+// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]]
+// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]]
+// CHECK: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index
+// CHECK: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index
+// CHECK: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32
+// CHECK: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32
+// CHECK: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32
+// CHECK: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]]
+// CHECK: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]]
// CHECKPARALLEL-LABEL: @generic_index_region
// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
@@ -961,13 +960,13 @@ func @generic_op_zero_rank(%arg0: memref<f32>, %arg1: memref<3x4xf32>)
return
}
-// CHECKLOOP-LABEL: @generic_op_zero_rank
-// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
-// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
-// CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-// CHECKLOOP: scf.for %[[j:.*]] = {{.*}}
-// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][]
-// CHECKLOOP: store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
+// CHECK-LABEL: @generic_op_zero_rank
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
+// CHECK: scf.for %[[i:.*]] = {{.*}}
+// CHECK: scf.for %[[j:.*]] = {{.*}}
+// CHECK: %[[a:.*]] = memref.load %[[ARG0]][]
+// CHECK: store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
// CHECKPARALLEL-LABEL: @generic_op_zero_rank
// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
@@ -992,16 +991,16 @@ func @generic_index_op_zero_rank(%arg0: memref<i32>, %arg1: memref<3x4xi32>)
return
}
-// CHECKLOOP-LABEL: @generic_index_op_zero_rank
-// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
-// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
-// CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-// CHECKLOOP: scf.for %[[j:.*]] = {{.*}}
-// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][
-// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index
-// CHECKLOOP: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
-// CHECKLOOP: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32
-// CHECKLOOP: store %[[result]], %[[ARG1]][%[[i]], %[[j]]]
+// CHECK-LABEL: @generic_index_op_zero_rank
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
+// CHECK: scf.for %[[i:.*]] = {{.*}}
+// CHECK: scf.for %[[j:.*]] = {{.*}}
+// CHECK: %[[a:.*]] = memref.load %[[ARG0]][
+// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index
+// CHECK: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
+// CHECK: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32
+// CHECK: store %[[result]], %[[ARG1]][%[[i]], %[[j]]]
// CHECKPARALLEL-LABEL: @generic_index_op_zero_rank
// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
@@ -1037,14 +1036,14 @@ func @generic_op_1D_reduce(%arg0: memref<?xf32>, %arg1: memref<f32>)
}
return
}
-// CHECKLOOP-LABEL: @generic_op_1D_reduce
-// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
-// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
-// CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
-// CHECKLOOP: %[[b:.*]] = memref.load %[[ARG1]][]
-// CHECKLOOP: %[[c:.*]] = addf %[[a]], %[[b]] : f32
-// CHECKLOOP: store %[[c]], %[[ARG1]][]
+// CHECK-LABEL: @generic_op_1D_reduce
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
+// CHECK: scf.for %[[i:.*]] = {{.*}}
+// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
+// CHECK: %[[b:.*]] = memref.load %[[ARG1]][]
+// CHECK: %[[c:.*]] = addf %[[a]], %[[b]] : f32
+// CHECK: store %[[c]], %[[ARG1]][]
// CHECKPARALLEL-LABEL: @generic_op_1D_reduce
// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
@@ -1087,17 +1086,17 @@ func @generic_index_op_1D_reduce(%arg0: memref<?xf32>,
}
return
}
-// CHECKLOOP-LABEL: @generic_index_op_1D_reduce
-// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
-// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
-// CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
-// CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
-// CHECKLOOP: %[[b:.*]] = memref.load %[[ARG1]][]
-// CHECKLOOP: %[[c:.*]] = memref.load %[[ARG2]][]
-// CHECKLOOP: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]]
-// CHECKLOOP: %[[e:.*]] = addf %[[a]], %[[d]]
-// CHECKLOOP: store %[[e]], %[[ARG2]][]
+// CHECK-LABEL: @generic_index_op_1D_reduce
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
+// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
+// CHECK: scf.for %[[i:.*]] = {{.*}}
+// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
+// CHECK: %[[b:.*]] = memref.load %[[ARG1]][]
+// CHECK: %[[c:.*]] = memref.load %[[ARG2]][]
+// CHECK: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]]
+// CHECK: %[[e:.*]] = addf %[[a]], %[[d]]
+// CHECK: store %[[e]], %[[ARG2]][]
// CHECKPARALLEL-LABEL: @generic_index_op_1D_reduce
// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
@@ -1126,11 +1125,11 @@ func @generic_const_init(%arg0: memref<?xf32>) {
}
return
}
-// CHECKLOOP-LABEL: @generic_const_init
-// CHECKLOOP-SAME: %[[ARG0:.*]]: memref<?xf32>
-// CHECKLOOP: %[[CONST:.*]] = constant 1.000000e+00 : f32
-// CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-// CHECKLOOP: store %[[CONST]], %[[ARG0]]
+// CHECK-LABEL: @generic_const_init
+// CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>
+// CHECK: %[[CONST:.*]] = constant 1.000000e+00 : f32
+// CHECK: scf.for %[[i:.*]] = {{.*}}
+// CHECK: store %[[CONST]], %[[ARG0]]
// CHECKPARALLEL-LABEL: @generic_const_init
// CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref<?xf32>
@@ -1165,18 +1164,18 @@ func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>,
}
return
}
-// CHECKLOOP-LABEL: @scalar_code
-// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
-// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
-// CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
-// CHECKLOOP-NOT: scf.for
-// CHECKLOOP: memref.load %[[ARG0]][]
-// CHECKLOOP: memref.load %[[ARG1]][]
-// CHECKLOOP: scf.if
-// CHECKLOOP: scf.yield
-// CHECKLOOP: else
-// CHECKLOOP: scf.yield
-// CHECKLOOP: store %{{.*}}, %[[ARG2]][]
+// CHECK-LABEL: @scalar_code
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
+// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
+// CHECK-NOT: scf.for
+// CHECK: memref.load %[[ARG0]][]
+// CHECK: memref.load %[[ARG1]][]
+// CHECK: scf.if
+// CHECK: scf.yield
+// CHECK: else
+// CHECK: scf.yield
+// CHECK: store %{{.*}}, %[[ARG2]][]
// CHECKPARALLEL-LABEL: @scalar_code
// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
@@ -1199,24 +1198,24 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
outs(%C : memref<?x?x?xf32>)
return
}
-// CHECKLOOP-LABEL: @named_batch_matmul
-// CHECKLOOP-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-// CHECKLOOP-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-// CHECKLOOP-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-// CHECKLOOP: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
-// CHECKLOOP: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
-// CHECKLOOP: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
-// CHECKLOOP: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
-// CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32
-// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
+// CHECK-LABEL: @named_batch_matmul
+// CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
+// CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
+// CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
+// CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
+// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
+// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
+// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
+// CHECK: scf.for %[[b:.*]] = %{{.*}} to %[[B]]
+// CHECK: scf.for %[[m:.*]] = %{{.*}} to %[[M]]
+// CHECK: scf.for %[[n:.*]] = %{{.*}} to %[[N]]
+// CHECK: scf.for %[[k:.*]] = %{{.*}} to %[[K]]
+// CHECK: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
+// CHECK: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
+// CHECK: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
+// CHECK: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32
+// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
+// CHECK: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
// CHECKPARALLEL-LABEL: @named_batch_matmul
// CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
@@ -1227,7 +1226,7 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
// CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
// CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
-// CHECKPARALLEL: scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
+// CHECKPARALLEL: scf.for %[[k:.*]] = %{{.*}} to %[[K]]
// CHECKPARALLEL: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
@@ -1242,23 +1241,23 @@ func @conv1d_no_symbols(%in : memref<?xf32>, %filter : memref<?xf32>, %out : mem
return
}
-// CHECKLOOP-LABEL: @conv1d_no_symbols
-// CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32>
-// CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32>
-// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
-// CHECKLOOP: %[[c0:.*]] = constant 0 : index
-// CHECKLOOP: %[[c1:.*]] = constant 1 : index
-// CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32>
-// CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32>
-// CHECKLOOP: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
-// CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]])
-// CHECKLOOP: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32>
-// CHECKLOOP: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32>
-// CHECKLOOP: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32>
-// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
-// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32>
+// CHECK-LABEL: @conv1d_no_symbols
+// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32>
+// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32>
+// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
+// CHECK: %[[c0:.*]] = constant 0 : index
+// CHECK: %[[c1:.*]] = constant 1 : index
+// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32>
+// CHECK: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32>
+// CHECK: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
+// CHECK: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
+// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]])
+// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32>
+// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32>
+// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32>
+// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
+// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
+// CHECK: store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32>
// CHECKPARALLEL-LABEL: @conv1d_no_symbols
// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32>
@@ -1284,30 +1283,30 @@ func @conv2d_no_symbols(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out :
outs(%out: memref<?x?xf32>)
return
}
-// CHECKLOOP-LABEL: @conv2d_no_symbols
-// CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32>
-// CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32>
-// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
-// CHECKLOOP: %[[c0:.*]] = constant 0 : index
-// CHECKLOOP: %[[c1:.*]] = constant 1 : index
-// CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32>
-// CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32>
-// CHECKLOOP: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32>
-// CHECKLOOP: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32>
-// CHECKLOOP: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
-// CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]])
-// CHECKLOOP: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]])
-// CHECKLOOP: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
-
-// CHECKLOOP: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
-// CHECKLOOP: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
-
-// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
-// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
+// CHECK-LABEL: @conv2d_no_symbols
+// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32>
+// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32>
+// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
+// CHECK: %[[c0:.*]] = constant 0 : index
+// CHECK: %[[c1:.*]] = constant 1 : index
+// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32>
+// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32>
+// CHECK: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32>
+// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32>
+// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
+// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
+// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
+// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
+// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]])
+// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]])
+// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
+
+// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
+// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
+
+// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
+// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
+// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
// CHECKPARALLEL-LABEL: @conv2d_no_symbols
// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32>
@@ -1338,36 +1337,36 @@ func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %o
return
}
-// CHECKLOOP-LABEL: @conv3d_no_symbols
-// CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-// CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-// CHECKLOOP: %[[c2:.*]] = constant 2 : index
-// CHECKLOOP: %[[c0:.*]] = constant 0 : index
-// CHECKLOOP: %[[c1:.*]] = constant 1 : index
-// CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
-// CHECKLOOP: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
-// CHECKLOOP: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
-// CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]])
-// CHECKLOOP: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]])
-// CHECKLOOP: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]])
-// CHECKLOOP: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
-
-// CHECKLOOP: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
-
-// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
-// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
-// CHECKLOOP: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
+// CHECK-LABEL: @conv3d_no_symbols
+// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
+// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
+// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
+// CHECK: %[[c2:.*]] = constant 2 : index
+// CHECK: %[[c0:.*]] = constant 0 : index
+// CHECK: %[[c1:.*]] = constant 1 : index
+// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
+// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
+// CHECK: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
+// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
+// CHECK: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
+// CHECK: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
+// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
+// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] {
+// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] {
+// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
+// CHECK: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
+// CHECK: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
+// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]])
+// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]])
+// CHECK: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]])
+// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
+
+// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
+// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
+
+// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
+// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
+// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
// CHECKPARALLEL-LABEL: @conv3d_no_symbols
// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
More information about the Mlir-commits
mailing list