[Mlir-commits] [mlir] e708471 - [mlir][NFC] Cleanup AffineOps directory structure
River Riddle
llvmlistbot at llvm.org
Fri Mar 20 14:24:01 PDT 2020
Author: Rob Suderman
Date: 2020-03-20T14:23:43-07:00
New Revision: e708471395b685f3edec2e8c1ab320358640ae74
URL: https://github.com/llvm/llvm-project/commit/e708471395b685f3edec2e8c1ab320358640ae74
DIFF: https://github.com/llvm/llvm-project/commit/e708471395b685f3edec2e8c1ab320358640ae74.diff
LOG: [mlir][NFC] Cleanup AffineOps directory structure
Summary:
Change AffineOps Dialect structure to better group both IR and Tranforms. This included extracting transforms directly related to AffineOps. Also move AffineOps to Affine.
Differential Revision: https://reviews.llvm.org/D76161
Added:
mlir/include/mlir/Dialect/Affine/CMakeLists.txt
mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
mlir/include/mlir/Dialect/Affine/IR/AffineOpsBase.td
mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
mlir/include/mlir/Dialect/Affine/IR/CMakeLists.txt
mlir/include/mlir/Dialect/Affine/Passes.h
mlir/include/mlir/Dialect/VectorOps/VectorOps.td
mlir/lib/Dialect/Affine/CMakeLists.txt
mlir/lib/Dialect/Affine/EDSC/Builders.cpp
mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt
mlir/lib/Dialect/Affine/IR/AffineOps.cpp
mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
mlir/lib/Dialect/Affine/IR/CMakeLists.txt
mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
mlir/lib/Dialect/VectorOps/VectorUtils.cpp
mlir/test/Dialect/Affine/canonicalize.mlir
mlir/test/Dialect/Affine/dma.mlir
mlir/test/Dialect/Affine/inlining.mlir
mlir/test/Dialect/Affine/invalid.mlir
mlir/test/Dialect/Affine/load-store-invalid.mlir
mlir/test/Dialect/Affine/load-store.mlir
mlir/test/Dialect/Affine/memref-stride-calculation.mlir
mlir/test/Dialect/Affine/ops.mlir
mlir/test/lib/Dialect/Affine/CMakeLists.txt
mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
Modified:
mlir/docs/Interfaces.md
mlir/docs/Tutorials/Toy/Ch-5.md
mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp
mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp
mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp
mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
mlir/include/mlir/Dialect/CMakeLists.txt
mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
mlir/include/mlir/Dialect/Vector/VectorOps.td
mlir/include/mlir/InitAllDialects.h
mlir/include/mlir/InitAllPasses.h
mlir/include/mlir/Transforms/Passes.h
mlir/lib/Analysis/AffineAnalysis.cpp
mlir/lib/Analysis/AffineStructures.cpp
mlir/lib/Analysis/CMakeLists.txt
mlir/lib/Analysis/LoopAnalysis.cpp
mlir/lib/Analysis/NestedMatcher.cpp
mlir/lib/Analysis/SliceAnalysis.cpp
mlir/lib/Analysis/Utils.cpp
mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
mlir/lib/Dialect/CMakeLists.txt
mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt
mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopSpecialization.cpp
mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopTiling.cpp
mlir/lib/Dialect/Vector/CMakeLists.txt
mlir/lib/Dialect/Vector/VectorTransforms.cpp
mlir/lib/Dialect/Vector/VectorUtils.cpp
mlir/lib/Transforms/CMakeLists.txt
mlir/lib/Transforms/LoopFusion.cpp
mlir/lib/Transforms/LoopTiling.cpp
mlir/lib/Transforms/LoopUnroll.cpp
mlir/lib/Transforms/LoopUnrollAndJam.cpp
mlir/lib/Transforms/MemRefDataFlowOpt.cpp
mlir/lib/Transforms/PipelineDataTransfer.cpp
mlir/lib/Transforms/Utils/CMakeLists.txt
mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
mlir/lib/Transforms/Utils/LoopUtils.cpp
mlir/lib/Transforms/Utils/Utils.cpp
mlir/lib/Transforms/Vectorize.cpp
mlir/test/EDSC/CMakeLists.txt
mlir/test/EDSC/builder-api-test.cpp
mlir/test/lib/Dialect/CMakeLists.txt
mlir/test/lib/Transforms/CMakeLists.txt
mlir/test/lib/Transforms/TestConstantFold.cpp
mlir/test/lib/Transforms/TestLoopFusion.cpp
mlir/test/lib/Transforms/TestMemRefBoundCheck.cpp
mlir/test/lib/Transforms/TestMemRefDependenceCheck.cpp
mlir/test/lib/Transforms/TestParallelismDetection.cpp
mlir/test/lib/Transforms/TestVectorizationUtils.cpp
mlir/tools/mlir-opt/CMakeLists.txt
Removed:
mlir/include/mlir/Dialect/AffineOps/AffineOps.h
mlir/include/mlir/Dialect/AffineOps/AffineOps.td
mlir/include/mlir/Dialect/AffineOps/AffineOpsBase.td
mlir/include/mlir/Dialect/AffineOps/AffineValueMap.h
mlir/include/mlir/Dialect/AffineOps/CMakeLists.txt
mlir/include/mlir/Dialect/AffineOps/EDSC/Builders.h
mlir/include/mlir/Dialect/AffineOps/EDSC/Intrinsics.h
mlir/lib/Dialect/AffineOps/AffineOps.cpp
mlir/lib/Dialect/AffineOps/AffineValueMap.cpp
mlir/lib/Dialect/AffineOps/CMakeLists.txt
mlir/lib/Dialect/AffineOps/EDSC/Builders.cpp
mlir/lib/Transforms/AffineDataCopyGeneration.cpp
mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp
mlir/lib/Transforms/SimplifyAffineStructures.cpp
mlir/test/Dialect/AffineOps/canonicalize.mlir
mlir/test/Dialect/AffineOps/dma.mlir
mlir/test/Dialect/AffineOps/inlining.mlir
mlir/test/Dialect/AffineOps/invalid.mlir
mlir/test/Dialect/AffineOps/load-store-invalid.mlir
mlir/test/Dialect/AffineOps/load-store.mlir
mlir/test/Dialect/AffineOps/memref-stride-calculation.mlir
mlir/test/Dialect/AffineOps/ops.mlir
mlir/test/lib/Transforms/TestAffineDataCopy.cpp
################################################################################
diff --git a/mlir/docs/Interfaces.md b/mlir/docs/Interfaces.md
index f413cac28bb0..16422a104582 100644
--- a/mlir/docs/Interfaces.md
+++ b/mlir/docs/Interfaces.md
@@ -63,7 +63,7 @@ struct AffineInlinerInterface : public DialectInlinerInterface {
};
/// Register the interface with the dialect.
-AffineOpsDialect::AffineOpsDialect(MLIRContext *context) ... {
+AffineDialect::AffineDialect(MLIRContext *context) ... {
addInterfaces<AffineInlinerInterface>();
}
```
diff --git a/mlir/docs/Tutorials/Toy/Ch-5.md b/mlir/docs/Tutorials/Toy/Ch-5.md
index dbc545c49206..8f32a7289a61 100644
--- a/mlir/docs/Tutorials/Toy/Ch-5.md
+++ b/mlir/docs/Tutorials/Toy/Ch-5.md
@@ -62,7 +62,7 @@ void ToyToAffineLoweringPass::runOnFunction() {
// We define the specific operations, or dialects, that are legal targets for
// this lowering. In our case, we are lowering to a combination of the
// `Affine` and `Standard` dialects.
- target.addLegalDialect<mlir::AffineOpsDialect, mlir::StandardOpsDialect>();
+ target.addLegalDialect<mlir::AffineDialect, mlir::StandardOpsDialect>();
// We also define the Toy dialect as Illegal so that the conversion will fail
// if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp
index 9559402708c8..249f17b0fc71 100644
--- a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp
+++ b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp
@@ -15,7 +15,7 @@
#include "toy/Dialect.h"
#include "toy/Passes.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
@@ -280,7 +280,7 @@ void ToyToAffineLoweringPass::runOnFunction() {
// We define the specific operations, or dialects, that are legal targets for
// this lowering. In our case, we are lowering to a combination of the
// `Affine` and `Standard` dialects.
- target.addLegalDialect<AffineOpsDialect, StandardOpsDialect>();
+ target.addLegalDialect<AffineDialect, StandardOpsDialect>();
// We also define the Toy dialect as Illegal so that the conversion will fail
// if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp
index 9559402708c8..249f17b0fc71 100644
--- a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp
+++ b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp
@@ -15,7 +15,7 @@
#include "toy/Dialect.h"
#include "toy/Passes.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
@@ -280,7 +280,7 @@ void ToyToAffineLoweringPass::runOnFunction() {
// We define the specific operations, or dialects, that are legal targets for
// this lowering. In our case, we are lowering to a combination of the
// `Affine` and `Standard` dialects.
- target.addLegalDialect<AffineOpsDialect, StandardOpsDialect>();
+ target.addLegalDialect<AffineDialect, StandardOpsDialect>();
// We also define the Toy dialect as Illegal so that the conversion will fail
// if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
index 5455738dff2a..f6dcba229276 100644
--- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
@@ -19,7 +19,7 @@
#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
diff --git a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp
index 9559402708c8..249f17b0fc71 100644
--- a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp
+++ b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp
@@ -15,7 +15,7 @@
#include "toy/Dialect.h"
#include "toy/Passes.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
@@ -280,7 +280,7 @@ void ToyToAffineLoweringPass::runOnFunction() {
// We define the specific operations, or dialects, that are legal targets for
// this lowering. In our case, we are lowering to a combination of the
// `Affine` and `Standard` dialects.
- target.addLegalDialect<AffineOpsDialect, StandardOpsDialect>();
+ target.addLegalDialect<AffineDialect, StandardOpsDialect>();
// We also define the Toy dialect as Illegal so that the conversion will fail
// if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
index 5455738dff2a..f6dcba229276 100644
--- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
@@ -19,7 +19,7 @@
#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
diff --git a/mlir/include/mlir/Dialect/Affine/CMakeLists.txt b/mlir/include/mlir/Dialect/Affine/CMakeLists.txt
new file mode 100644
index 000000000000..f33061b2d87c
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Affine/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(IR)
diff --git a/mlir/include/mlir/Dialect/AffineOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
similarity index 97%
rename from mlir/include/mlir/Dialect/AffineOps/EDSC/Builders.h
rename to mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
index f750a1d41f8a..9c320ece2209 100644
--- a/mlir/include/mlir/Dialect/AffineOps/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_AFFINEOPS_EDSC_BUILDERS_H_
-#define MLIR_DIALECT_AFFINEOPS_EDSC_BUILDERS_H_
+#ifndef MLIR_DIALECT_AFFINE_EDSC_BUILDERS_H_
+#define MLIR_DIALECT_AFFINE_EDSC_BUILDERS_H_
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/EDSC/Builders.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Types.h"
@@ -203,4 +203,4 @@ ValueHandle TemplatedIndexedValue<Load, Store>::operator>=(ValueHandle e) {
} // namespace edsc
} // namespace mlir
-#endif // MLIR_DIALECT_AFFINEOPS_EDSC_BUILDERS_H_
+#endif // MLIR_DIALECT_AFFINE_EDSC_BUILDERS_H_
diff --git a/mlir/include/mlir/Dialect/AffineOps/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
similarity index 87%
rename from mlir/include/mlir/Dialect/AffineOps/EDSC/Intrinsics.h
rename to mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
index 67d4ac16bb0b..392e2433b992 100644
--- a/mlir/include/mlir/Dialect/AffineOps/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
@@ -5,10 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_AFFINEOPS_EDSC_INTRINSICS_H_
-#define MLIR_DIALECT_AFFINEOPS_EDSC_INTRINSICS_H_
+#ifndef MLIR_DIALECT_AFFINE_EDSC_INTRINSICS_H_
+#define MLIR_DIALECT_AFFINE_EDSC_INTRINSICS_H_
-#include "mlir/Dialect/AffineOps/EDSC/Builders.h"
+#include "mlir/Dialect/Affine/EDSC/Builders.h"
#include "mlir/EDSC/Intrinsics.h"
namespace mlir {
diff --git a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
similarity index 99%
rename from mlir/include/mlir/Dialect/AffineOps/AffineOps.h
rename to mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
index edae534f12ba..6ce38bcddddc 100644
--- a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H
-#define MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H
+#ifndef MLIR_DIALECT_AFFINE_IR_AFFINEOPS_H
+#define MLIR_DIALECT_AFFINE_IR_AFFINEOPS_H
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
@@ -493,10 +493,10 @@ AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
void fullyComposeAffineMapAndOperands(AffineMap *map,
SmallVectorImpl<Value> *operands);
-#include "mlir/Dialect/AffineOps/AffineOpsDialect.h.inc"
+#include "mlir/Dialect/Affine/IR/AffineOpsDialect.h.inc"
#define GET_OP_CLASSES
-#include "mlir/Dialect/AffineOps/AffineOps.h.inc"
+#include "mlir/Dialect/Affine/IR/AffineOps.h.inc"
/// Returns if the provided value is the induction variable of a AffineForOp.
bool isForInductionVar(Value val);
diff --git a/mlir/include/mlir/Dialect/AffineOps/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
similarity index 99%
rename from mlir/include/mlir/Dialect/AffineOps/AffineOps.td
rename to mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
index 307860f1622b..6994b5f17661 100644
--- a/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
@@ -13,11 +13,11 @@
#ifndef AFFINE_OPS
#define AFFINE_OPS
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
+include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/Interfaces/SideEffects.td"
-def AffineOps_Dialect : Dialect {
+def Affine_Dialect : Dialect {
let name = "affine";
let cppNamespace = "";
let hasConstantMaterializer = 1;
@@ -25,7 +25,7 @@ def AffineOps_Dialect : Dialect {
// Base class for Affine dialect ops.
class Affine_Op<string mnemonic, list<OpTrait> traits = []> :
- Op<AffineOps_Dialect, mnemonic, traits> {
+ Op<Affine_Dialect, mnemonic, traits> {
// For every affine op, there needs to be a:
// * void print(OpAsmPrinter &p, ${C++ class of Op} op)
// * LogicalResult verify(${C++ class of Op} op)
@@ -291,7 +291,7 @@ def AffineIfOp : Affine_Op<"if",
}
class AffineMinMaxOpBase<string mnemonic, list<OpTrait> traits = []> :
- Op<AffineOps_Dialect, mnemonic, traits> {
+ Op<Affine_Dialect, mnemonic, traits> {
let arguments = (ins AffineMapAttr:$map, Variadic<Index>:$operands);
let results = (outs Index);
diff --git a/mlir/include/mlir/Dialect/AffineOps/AffineOpsBase.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOpsBase.td
similarity index 100%
rename from mlir/include/mlir/Dialect/AffineOps/AffineOpsBase.td
rename to mlir/include/mlir/Dialect/Affine/IR/AffineOpsBase.td
diff --git a/mlir/include/mlir/Dialect/AffineOps/AffineValueMap.h b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
similarity index 96%
rename from mlir/include/mlir/Dialect/AffineOps/AffineValueMap.h
rename to mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
index 3ec2b8559728..ffc6d73f3825 100644
--- a/mlir/include/mlir/Dialect/AffineOps/AffineValueMap.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
@@ -10,8 +10,8 @@
// analysis purposes.
//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_AFFINEOPS_AFFINEVALUEMAP_H
-#define MLIR_DIALECT_AFFINEOPS_AFFINEVALUEMAP_H
+#ifndef MLIR_DIALECT_AFFINE_IR_AFFINEVALUEMAP_H
+#define MLIR_DIALECT_AFFINE_IR_AFFINEVALUEMAP_H
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/OperationSupport.h"
@@ -87,4 +87,4 @@ class AffineValueMap {
} // namespace mlir
-#endif // MLIR_DIALECT_AFFINEOPS_AFFINEVALUEMAP_H
+#endif // MLIR_DIALECT_AFFINE_IR_AFFINEVALUEMAP_H
diff --git a/mlir/include/mlir/Dialect/AffineOps/CMakeLists.txt b/mlir/include/mlir/Dialect/Affine/IR/CMakeLists.txt
similarity index 100%
rename from mlir/include/mlir/Dialect/AffineOps/CMakeLists.txt
rename to mlir/include/mlir/Dialect/Affine/IR/CMakeLists.txt
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
new file mode 100644
index 000000000000..343a0a60f118
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -0,0 +1,48 @@
+//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines a set of transforms specific for the AffineOps
+// dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_AFFINE_TRANSFORMS_PASSES_H
+#define MLIR_DIALECT_AFFINE_TRANSFORMS_PASSES_H
+
+#include "mlir/Support/LLVM.h"
+#include <functional>
+#include <limits>
+
+namespace mlir {
+
+class AffineForOp;
+class FuncOp;
+class ModuleOp;
+class Pass;
+template <typename T> class OpPassBase;
+
+/// Creates a simplification pass for affine structures (maps and sets). In
+/// addition, this pass also normalizes memrefs to have the trivial (identity)
+/// layout map.
+std::unique_ptr<OpPassBase<FuncOp>> createSimplifyAffineStructuresPass();
+
+/// Creates a loop invariant code motion pass that hoists loop invariant
+/// instructions out of affine loop.
+std::unique_ptr<OpPassBase<FuncOp>> createAffineLoopInvariantCodeMotionPass();
+
+/// Performs packing (or explicit copying) of accessed memref regions into
+/// buffers in the specified faster memory space through either pointwise copies
+/// or DMA operations.
+std::unique_ptr<OpPassBase<FuncOp>> createAffineDataCopyGenerationPass(
+ unsigned slowMemorySpace, unsigned fastMemorySpace,
+ unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,
+ uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());
+
+} // end namespace mlir
+
+#endif // MLIR_DIALECT_AFFINE_RANSFORMS_PASSES_H
diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt
index 32b24264ba69..aabb2fdc6e83 100644
--- a/mlir/include/mlir/Dialect/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_subdirectory(AffineOps)
+add_subdirectory(Affine)
add_subdirectory(AVX512)
add_subdirectory(FxpMathOps)
add_subdirectory(GPU)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 90489299bafe..dc0c03f26cc8 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -13,7 +13,7 @@
#ifndef LINALG_OPS
#define LINALG_OPS
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
+include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
include "mlir/Dialect/Linalg/IR/LinalgBase.td"
include "mlir/Interfaces/SideEffects.td"
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index 457a8db7788f..5c8590fc6063 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -14,7 +14,7 @@
#ifndef LINALG_STRUCTURED_OPS
#define LINALG_STRUCTURED_OPS
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
+include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
include "mlir/Dialect/Linalg/IR/LinalgBase.td"
include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td"
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
index c080ff2066d0..7fa33e4f2982 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
@@ -15,7 +15,7 @@
include "mlir/Dialect/Linalg/IR/LinalgOps.td"
include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.td"
-include "mlir/Dialect/AffineOps/AffineOps.td"
+include "mlir/Dialect/Affine/IR/AffineOps.td"
def HasNoLinalgTransformMarker : CPred<[{
!op.getAttrOfType<StringAttr>(LinalgTransforms::kLinalgTransformMarker)
diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td
index 2a791365db4f..a3864614d05f 100644
--- a/mlir/include/mlir/Dialect/Vector/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td
@@ -13,7 +13,7 @@
#ifndef VECTOR_OPS
#define VECTOR_OPS
-include "mlir/Dialect/AffineOps/AffineOpsBase.td"
+include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
include "mlir/Interfaces/SideEffects.td"
def Vector_Dialect : Dialect {
diff --git a/mlir/include/mlir/Dialect/VectorOps/VectorOps.td b/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
new file mode 100644
index 000000000000..ef1fde00e5ec
--- /dev/null
+++ b/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
@@ -0,0 +1,1402 @@
+//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines MLIR vector operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VECTOR_OPS
+#define VECTOR_OPS
+
+include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
+include "mlir/Interfaces/SideEffects.td"
+
+
+def VectorOps_Dialect : Dialect {
+ let name = "vector";
+ let cppNamespace = "vector";
+ let hasConstantMaterializer = 1;
+}
+
+// Base class for Vector dialect ops.
+class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
+ Op<VectorOps_Dialect, mnemonic, traits> {
+ // For every vector op, there needs to be a:
+ // * void print(OpAsmPrinter &p, ${C++ class of Op} op)
+ // * LogicalResult verify(${C++ class of Op} op)
+ // * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
+ // OperationState &result)
+ // functions.
+ let printer = [{ return ::print(p, *this); }];
+ let verifier = [{ return ::verify(*this); }];
+ let parser = [{ return ::parse$cppClass(parser, result); }];
+}
+
+// TODO(andydavis, ntv) Add an attribute to specify a
diff erent algebra
+// with operators other than the current set: {*, +}.
+def Vector_ContractionOp :
+ Vector_Op<"contract", [NoSideEffect,
+ PredOpTrait<"first operand lhs and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>,
+ PredOpTrait<"second operand rhs and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 1>>,
+ PredOpTrait<"third operand acc and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 1>>]>,
+ Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyType:$acc,
+ Variadic<VectorOf<[I1]>>:$masks,
+ AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types)>,
+ Results<(outs AnyType)> {
+ let summary = "vector contraction operation";
+ let description = [{
+ Computes the sum of products of vector elements along contracting
+ dimension pairs from 2 vectors of rank M and N respectively, adds this
+ intermediate result to the accumulator argument of rank K, and returns a
+ vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
+ num_batch_dims (see dimension type descriptions below)). For K = 0 (no
+ free or batch dimensions), the accumulator and output are a scalar.
+
+ Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp)
+ specify the dynamic dimension sizes of valid data within the lhs/rhs vector
+ arguments.
+
+ An iterator type attribute list must be specified, where each element of
+ the list represents an iterator with one of the following types:
+
+ *) "reduction": reduction dimensions are present in the lhs and rhs
+ arguments but not in the output (and accumulator
+ argument). These are the dimensions along which the vector
+ contraction op computes the sum of products, and
+ contracting dimension pair dimension sizes must match
+ between lhs/rhs.
+ *) "parallel": Batch dimensions are iterator type "parallel", and
+ are non-contracting dimensions present in the lhs, rhs and
+ output. The lhs/rhs co-iterate along the batch dimensions,
+ which should be expressed in their indexing maps.
+
+ Free dimensions are iterator type "parallel", and are
+ non-contraction, non-batch dimensions accessed by either the
+ lhs or rhs (but not both). The lhs and rhs free dimensions
+ are unrelated to each other and do not co-iterate, which
+ should be expressed in their indexing maps.
+
+ An indexing map attribute list must be specified with an entry for lhs, rhs
+ and acc arguments. An indexing map attribute specifies a mapping from each
+ iterator in the iterator type list, to each dimension of an N-D vector.
+
+ Examples:
+
+ // Simple dot product (K = 0).
+ #contraction_accesses = [
+ affine_map<(i) -> (i)>,
+ affine_map<(i) -> (i)>,
+ affine_map<(i) -> ()>
+ ]
+ #contraction_trait = {
+ indexing_maps = #contraction_accesses,
+ iterator_types = ["reduction"]
+ }
+ %3 = vector.contract #contraction_trait %0, %1, %2
+ : vector<10xf32>, vector<10xf32> into f32
+
+ // 2D vector contraction with one contracting dimension (matmul, K = 2).
+ #contraction_accesses = [
+ affine_map<(i, j, k) -> (i, k)>,
+ affine_map<(i, j, k) -> (k, j)>,
+ affine_map<(i, j, k) -> (i, j)>
+ ]
+ #contraction_trait = {
+ indexing_maps = #contraction_accesses,
+ iterator_types = ["parallel", "parallel", "reduction"]
+ }
+
+ %3 = vector.contract #contraction_trait %0, %1, %2
+ : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
+
+ // 4D to 3D vector contraction with two contracting dimensions and
+ // one batch dimension (K = 3).
+ #contraction_accesses = [
+ affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
+ affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
+ affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
+ ]
+ #contraction_trait = {
+ indexing_maps = #contraction_accesses,
+ iterator_types = ["parallel", "parallel", "parallel",
+ "reduction", "reduction"]
+ }
+
+ %4 = vector.contract #contraction_trait %0, %1, %2
+ : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
+
+ // 4D vector contraction with two contracting dimensions and optional
+ // vector mask arguments.
+ %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
+ %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
+
+ %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
+ : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
+ }];
+ let builders = [OpBuilder<
+ "Builder *builder, OperationState &result, Value lhs, Value rhs, "
+ "Value acc, ArrayAttr indexingMaps, ArrayAttr iteratorTypes">,
+ OpBuilder<
+ "Builder *builder, OperationState &result, Value lhs, Value rhs, "
+ "Value acc, ArrayRef<ArrayRef<AffineExpr>> indexingExprs, "
+ "ArrayRef<StringRef> iteratorTypes">];
+ let extraClassDeclaration = [{
+ VectorType getLhsType() {
+ return lhs().getType().cast<VectorType>();
+ }
+ VectorType getRhsType() {
+ return rhs().getType().cast<VectorType>();
+ }
+ Type getAccType() { return acc().getType(); }
+ VectorType getLHSVectorMaskType() {
+ if (llvm::size(masks()) != 2) return VectorType();
+ return getOperand(3).getType().cast<VectorType>();
+ }
+ VectorType getRHSVectorMaskType() {
+ if (llvm::size(masks()) != 2) return VectorType();
+ return getOperand(4).getType().cast<VectorType>();
+ }
+ Type getResultType() { return getResult().getType(); }
+ ArrayRef<StringRef> getTraitAttrNames();
+ SmallVector<AffineMap, 4> getIndexingMaps();
+ static unsigned getAccOperandIndex() { return 2; }
+
+ // Returns the bounds of each dimension in the iteration space spanned
+ // by the iterator types of this operation.
+ void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);
+
+ // Returns a list of index maps, where there is a list entry for each
+ // op indexing map attribute (i.e. one for each input and output, with
+ // the output listed last). Each index map, maps from this operations
+ // iteration space, to vector dimensions of the maps input/output.
+ void getIterationIndexMap(
+ std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);
+
+ std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
+ std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
+ }];
+}
+
+def Vector_ReductionOp :
+ Vector_Op<"reduction", [NoSideEffect,
+ PredOpTrait<"source operand and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>]>,
+ Arguments<(ins StrAttr:$kind, AnyVector:$vector, Variadic<AnyType>:$acc)>,
+ Results<(outs AnyType:$dest)> {
+ let summary = "reduction operation";
+ let description = [{
+ Reduces an 1-D vector "horizontally" into a scalar using the given
+ operation (add/mul/min/max for int/fp and and/or/xor for int only).
+ Some reductions (add/mul for fp) also allow an optional fused
+ accumulator.
+
+ Note that these operations are restricted to 1-D vectors to remain
+ close to the corresponding LLVM intrinsics:
+
+ http://llvm.org/docs/LangRef.html#experimental-vector-reduction-intrinsics
+
+ Examples:
+ ```
+ %1 = vector.reduction "add", %0 : vector<16xf32> into f32
+
+ %3 = vector.reduction "xor", %2 : vector<4xi32> into i32
+
+ %4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32
+ ```
+ }];
+ let extraClassDeclaration = [{
+ VectorType getVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ }];
+}
+
+def Vector_BroadcastOp :
+ Vector_Op<"broadcast", [NoSideEffect,
+ PredOpTrait<"source operand and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>]>,
+ Arguments<(ins AnyType:$source)>,
+ Results<(outs AnyVector:$vector)> {
+ let summary = "broadcast operation";
+ let description = [{
+ Broadcasts the scalar or k-D vector value in the source operand
+ to a n-D result vector such that the broadcast makes sense, i.e.,
+ the source operand is duplicated to match the given rank and sizes
+ in the result vector. The legality rules are:
+ * the source operand must have the same element type as the result type
+ * a k-D vector <s_1 x .. x s_k x type> can be broadcast to
+ a n-D vector <t_1 x .. x t_n x type> if
+ * k <= n, and
+ * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n
+ match exactly as s_j = t_i or s_j = 1:
+ ```
+ t_1 x .. t_n-k x t_n-k+1 x .. x t_i x .. x t_n
+ s_1 x .. x s_j x .. x s_k
+ <duplication> <potential stretch>
+ ```
+ The source operand is duplicated over all the missing leading dimensions
+ and stretched over the trailing dimensions where the source has a non-equal
+ dimension of 1. These rules imply that any scalar broadcast (k=0) to any
+ shaped vector with the same element type is always legal.
+
+ Examples:
+ ```
+ %0 = constant 0.0 : f32
+ %1 = vector.broadcast %0 : f32 to vector<16xf32>
+ %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
+ ```
+ }];
+ let extraClassDeclaration = [{
+ Type getSourceType() { return source().getType(); }
+ VectorType getVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ }];
+ let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)";
+}
+
+def Vector_ShuffleOp :
+ Vector_Op<"shuffle", [NoSideEffect,
+ PredOpTrait<"first operand v1 and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>,
+ PredOpTrait<"second operand v2 and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 1>>]>,
+ Arguments<(ins AnyVector:$v1, AnyVector:$v2, I64ArrayAttr:$mask)>,
+ Results<(outs AnyVector:$vector)> {
+ let summary = "shuffle operation";
+ let description = [{
+ The shuffle operation constructs a permutation (or duplication) of elements
+ from two input vectors, returning a vector with the same element type as
+ the input and a length that is the same as the shuffle mask. The two input
+ vectors must have the same element type, rank, and trailing dimension sizes
+ and shuffles their values in the leading dimension (which may
diff er in size)
+ according to the given mask. The legality rules are:
+ * the two operands must have the same element type as the result
+ * the two operands and the result must have the same rank and trailing
+ dimension sizes, viz. given two k-D operands
+ v1 : <s_1 x s_2 x .. x s_k x type> and
+ v2 : <t_1 x t_2 x .. x t_k x type>
+ we have s_i = t_i for all 1 < i <= k
+ * the mask length equals the leading dimension size of the result
+ * numbering the input vector indices left to right across the operands, all
+ mask values must be within range, viz. given two k-D operands v1 and v2
+ above, all mask values are in the range [0,s_1+t_1)
+
+ Examples:
+ ```
+ %0 = vector.shuffle %a, %b[0, 3]
+ : vector<2xf32>, vector<2xf32> ; yields vector<2xf32>
+ %1 = vector.shuffle %c, %b[0, 1, 2]
+ : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
+ %2 = vector.shuffle %a, %b[3, 2, 1, 0]
+ : vector<2xf32>, vector<2xf32> ; yields vector<4xf32>
+
+ ```
+ }];
+ let builders = [OpBuilder<"Builder *builder, OperationState &result,"
+ "Value v1, Value v2, ArrayRef<int64_t>">];
+ let extraClassDeclaration = [{
+ static StringRef getMaskAttrName() { return "mask"; }
+ VectorType getV1VectorType() {
+ return v1().getType().cast<VectorType>();
+ }
+ VectorType getV2VectorType() {
+ return v2().getType().cast<VectorType>();
+ }
+ VectorType getVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ }];
+}
+
+def Vector_ExtractElementOp :
+ Vector_Op<"extractelement", [NoSideEffect,
+ TypesMatchWith<"result type matches element type of vector operand",
+ "vector", "result",
+ "$_self.cast<ShapedType>().getElementType()">]>,
+ Arguments<(ins AnyVector:$vector, AnySignlessInteger:$position)>,
+ Results<(outs AnyType:$result)> {
+ let summary = "extractelement operation";
+ let description = [{
+ Takes an 1-D vector and a dynamic index position and extracts the
+ scalar at that position. Note that this instruction resembles
+ vector.extract, but is restricted to 1-D vectors and relaxed
+ to dynamic indices. It is meant to be closer to LLVM's version:
+ https://llvm.org/docs/LangRef.html#extractelement-instruction
+
+ Example:
+ ```
+ %c = constant 15 : i32
+ %1 = vector.extractelement %0[%c : i32]: vector<16xf32>
+ ```
+ }];
+ let extraClassDeclaration = [{
+ VectorType getVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ }];
+
+ let assemblyFormat = [{
+ $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector)
+ }];
+}
+
+def Vector_ExtractOp :
+ Vector_Op<"extract", [NoSideEffect,
+ PredOpTrait<"operand and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>]>,
+ Arguments<(ins AnyVector:$vector, I64ArrayAttr:$position)>,
+ Results<(outs AnyType)> {
+ let summary = "extract operation";
+ let description = [{
+ Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
+ the proper position. Degenerates to an element type in the 0-D case.
+
+ Examples:
+ ```
+ %1 = vector.extract %0[3]: vector<4x8x16xf32>
+ %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
+ ```
+ }];
+ let builders = [OpBuilder<
+ "Builder *builder, OperationState &result, Value source,"
+ "ArrayRef<int64_t>">];
+ let extraClassDeclaration = [{
+ static StringRef getPositionAttrName() { return "position"; }
+ VectorType getVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ }];
+}
+
+def Vector_ExtractSlicesOp :
+ Vector_Op<"extract_slices", [NoSideEffect]>,
+ Arguments<(ins AnyVector:$vector, I64ArrayAttr:$sizes,
+ I64ArrayAttr:$strides)>,
+ Results<(outs TupleOf<[AnyVector]>)> {
+ let summary = "vector extract slices operation";
+ let description = [{
+ Takes an N-d vector and returns a tuple of vector slices of 'vector',
+ based on 'sizes' and 'strides' parameters.
+
+ The arguments 'sizes' and 'strides' represent a specification for
+ generating the unrolling of 'vector' shape, which has all slices of shape
+ 'sizes' except for slices at dimension boundaries when 'vector' dimension
+ sizes are not a multiple of 'sizes'.
+
+ Each slice is returned at the tuple element index corresponding to the
+ linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
+ Currently, only unit strides are supported.
+
+ Examples:
+ ```
+ %0 = vector.transfer_read ...: vector<4x2xf32>
+
+ %1 = vector.extract_slices %0, [2, 2], [1, 1]
+ : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
+
+ // Example with partial slices at dimension boundaries.
+ %2 = vector.transfer_read ...: vector<4x3xf32>
+
+ %3 = vector.extract_slices %2, [2, 2], [1, 1]
+ : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
+ vector<2x2xf32>, vector<2x1xf32>>
+ ```
+ }];
+ let builders = [OpBuilder<
+ "Builder *builder, OperationState &result, TupleType tupleType, " #
+ "Value vector, ArrayRef<int64_t> sizes, " #
+ "ArrayRef<int64_t> strides">];
+ let extraClassDeclaration = [{
+ VectorType getSourceVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ TupleType getResultTupleType() {
+ return getResult().getType().cast<TupleType>();
+ }
+ void getSizes(SmallVectorImpl<int64_t> &results);
+ void getStrides(SmallVectorImpl<int64_t> &results);
+ static StringRef getSizesAttrName() { return "sizes"; }
+ static StringRef getStridesAttrName() { return "strides"; }
+ }];
+ let assemblyFormat = [{
+ $vector `,` $sizes `,` $strides attr-dict `:` type($vector) `into`
+ type(results)
+ }];
+}
+
+def Vector_FMAOp :
+ Op<VectorOps_Dialect, "fma", [NoSideEffect,
+ AllTypesMatch<["lhs", "rhs", "acc", "result"]>]>,
+ Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc)>,
+ Results<(outs AnyVector:$result)> {
+ let summary = "vector fused multiply-add";
+ let description = [{
+ Multiply-add expressions operate on n-D vectors and compute a fused
+ pointwise multiply-and-accumulate: `$result = `$lhs * $rhs + $acc`.
+ All operands and result have the same vector type. The semantics
+ of the operation correspond to those of the `llvm.fma`
+ [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the
+ particular case of lowering to LLVM, this is guaranteed to lower
+ to the `llvm.fma.*` intrinsic.
+
+ Example:
+
+ ```
+ %3 = vector.fma %0, %1, %2: vector<8x16xf32>
+ ```
+ }];
+ // Fully specified by traits.
+ let verifier = ?;
+ let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)";
+ let builders = [OpBuilder<
+ "Builder *b, OperationState &result, Value lhs, Value rhs, Value acc",
+ "build(b, result, lhs.getType(), lhs, rhs, acc);">];
+ let extraClassDeclaration = [{
+ VectorType getVectorType() { return lhs().getType().cast<VectorType>(); }
+ }];
+}
+
+def Vector_InsertElementOp :
+ Vector_Op<"insertelement", [NoSideEffect,
+ TypesMatchWith<"source operand type matches element type of result",
+ "result", "source",
+ "$_self.cast<ShapedType>().getElementType()">,
+ AllTypesMatch<["dest", "result"]>]>,
+ Arguments<(ins AnyType:$source, AnyVector:$dest,
+ AnySignlessInteger:$position)>,
+ Results<(outs AnyVector:$result)> {
+ let summary = "insertelement operation";
+ let description = [{
+ Takes a scalar source, an 1-D destination vector and a dynamic index
+ position and inserts the source into the destination at the proper
+ position. Note that this instruction resembles vector.insert, but
+ is restricted to 1-D vectors and relaxed to dynamic indices. It is
+ meant to be closer to LLVM's version:
+ https://llvm.org/docs/LangRef.html#insertelement-instruction
+
+ Example:
+ ```
+ %c = constant 15 : i32
+ %f = constant 0.0f : f32
+ %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
+ ```
+ }];
+ let extraClassDeclaration = [{
+ Type getSourceType() { return source().getType(); }
+ VectorType getDestVectorType() {
+ return dest().getType().cast<VectorType>();
+ }
+ }];
+
+ let assemblyFormat = [{
+ $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:`
+ type($result)
+ }];
+}
+
+def Vector_InsertOp :
+ Vector_Op<"insert", [NoSideEffect,
+ PredOpTrait<"source operand and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>,
+ AllTypesMatch<["dest", "res"]>]>,
+ Arguments<(ins AnyType:$source, AnyVector:$dest, I64ArrayAttr:$position)>,
+ Results<(outs AnyVector:$res)> {
+ let summary = "insert operation";
+ let description = [{
+ Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
+ and inserts the n-D source into the (n+k)-D destination at the proper
+ position. Degenerates to a scalar source type when n = 0.
+
+ Examples:
+ ```
+ %2 = vector.insert %0, %1[3]:
+ vector<8x16xf32> into vector<4x8x16xf32>
+ %5 = vector.insert %3, %4[3, 3, 3]:
+ f32 into vector<4x8x16xf32>
+ ```
+ }];
+ let assemblyFormat = [{
+ $source `,` $dest $position attr-dict `:` type($source) `into` type($dest)
+ }];
+
+ let builders = [OpBuilder<
+ "Builder *builder, OperationState &result, Value source, " #
+ "Value dest, ArrayRef<int64_t>">];
+ let extraClassDeclaration = [{
+ static StringRef getPositionAttrName() { return "position"; }
+ Type getSourceType() { return source().getType(); }
+ VectorType getDestVectorType() {
+ return dest().getType().cast<VectorType>();
+ }
+ }];
+}
+
+def Vector_InsertSlicesOp :
+ Vector_Op<"insert_slices", [NoSideEffect]>,
+ Arguments<(ins TupleOf<[AnyVector]>:$vectors, I64ArrayAttr:$sizes,
+ I64ArrayAttr:$strides)>,
+ Results<(outs AnyVector)> {
+ let summary = "vector insert slices operation";
+ let description = [{
+ Takes a tuple of vector slices and inserts them into the vector result
+ according to the 'sizes' and 'strides' parameters.
+
+ The arguments 'sizes' and 'strides' represent a specification for
+ generating the unrolling of 'vector' shape, which has all slices of shape
+ 'sizes' except for slices at dimension boundaries when 'vector' dimension
+ sizes are not a multiple of 'sizes'.
+
+ Each slice in 'vectors' is at the tuple element index corresponding to the
+ linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
+ Currently, only unit strides are supported.
+
+ Examples:
+ ```
+ %0 = vector.extract_slices %0, [2, 2], [1, 1]
+ : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
+
+ %1 = vector.insert_slices %0, [2, 2], [1, 1]
+ : tuple<vector<2x2xf32>, vector<2x2xf32>> into vector<4x2xf32>
+
+ // Example with partial slices at dimension boundaries.
+ %3 = vector.extract_slices %2, [2, 2], [1, 1]
+ : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
+ vector<2x2xf32>, vector<2x1xf32>>
+
+ %4 = vector.insert_slices %3, [2, 2], [1, 1]
+ : tuple<vector<2x2xf32>, vector<2x1xf32>,
+ vector<2x2xf32>, vector<2x1xf32>> into vector<4x3xf32>
+ ```
+ }];
+
+ let extraClassDeclaration = [{
+ TupleType getSourceTupleType() {
+ return vectors().getType().cast<TupleType>();
+ }
+ VectorType getResultVectorType() {
+ return getResult().getType().cast<VectorType>();
+ }
+ void getSizes(SmallVectorImpl<int64_t> &results);
+ void getStrides(SmallVectorImpl<int64_t> &results);
+ static StringRef getSizesAttrName() { return "sizes"; }
+ static StringRef getStridesAttrName() { return "strides"; }
+ }];
+ let assemblyFormat = [{
+ $vectors `,` $sizes `,` $strides attr-dict `:` type($vectors) `into`
+ type(results)
+ }];
+}
+
+def Vector_InsertStridedSliceOp :
+ Vector_Op<"insert_strided_slice", [NoSideEffect,
+ PredOpTrait<"operand #0 and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>,
+ AllTypesMatch<["dest", "res"]>]>,
+ Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets,
+ I64ArrayAttr:$strides)>,
+ Results<(outs AnyVector:$res)> {
+ let summary = "strided_slice operation";
+ let description = [{
+ Takes a k-D source vector, an n-D destination vector (n >= k), n-sized
+ `offsets` integer array attribute, a k-sized `strides` integer array attribute
+ and inserts the k-D source vector as a strided subvector at the proper offset
+ into the n-D destination vector.
+
+ At the moment strides must contain only 1s.
+
+ Returns an n-D vector that is a copy of the n-D destination vector in which
+ the last k-D dimensions contain the k-D source vector elements strided at
+ the proper location as specified by the offsets.
+
+ Examples:
+ ```
+ %2 = vector.insert_strided_slice %0, %1
+ {offsets = [0, 0, 2], strides = [1, 1]}:
+ vector<2x4xf32> into vector<16x4x8xf32>
+ ```
+ }];
+
+ let assemblyFormat = [{
+ $source `,` $dest attr-dict `:` type($source) `into` type($dest)
+ }];
+
+ let builders = [OpBuilder<
+ "Builder *builder, OperationState &result, Value source, Value dest, " #
+ "ArrayRef<int64_t> offsets, ArrayRef<int64_t> strides">];
+ let extraClassDeclaration = [{
+ static StringRef getOffsetsAttrName() { return "offsets"; }
+ static StringRef getStridesAttrName() { return "strides"; }
+ VectorType getSourceVectorType() {
+ return source().getType().cast<VectorType>();
+ }
+ VectorType getDestVectorType() {
+ return dest().getType().cast<VectorType>();
+ }
+ }];
+}
+
+def Vector_OuterProductOp :
+ Vector_Op<"outerproduct", [NoSideEffect, SameOperandsAndResultElementType]>,
+ Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, Variadic<AnyVector>:$acc)>,
+ Results<(outs AnyVector)> {
+ let summary = "vector outerproduct with optional fused add";
+ let description = [{
+ Takes 2 1-D vectors and returns the 2-D vector containing the outer-product.
+
+ An optional extra 2-D vector argument may be specified in which case the
+ operation returns the sum of the outer-product and the extra vector. In this
+ multiply-accumulate scenario, the rounding mode is that obtained by
+ guaranteeing that a fused-multiply add operation is emitted. When lowered to
+ the LLVMIR dialect, this form emits `llvm.intr.fma`, which is guaranteed to
+ lower to actual `fma` instructions on x86.
+
+ Examples:
+
+ ```
+ %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
+ return %2: vector<4x8xf32>
+
+ %3 = vector.outerproduct %0, %1, %2:
+ vector<4xf32>, vector<8xf32>, vector<4x8xf32>
+ return %3: vector<4x8xf32>
+ ```
+ }];
+ let extraClassDeclaration = [{
+ VectorType getOperandVectorTypeLHS() {
+ return lhs().getType().cast<VectorType>();
+ }
+ VectorType getOperandVectorTypeRHS() {
+ return rhs().getType().cast<VectorType>();
+ }
+ VectorType getOperandVectorTypeACC() {
+ return (llvm::size(acc()) == 0) ? VectorType() :
+ (*acc().begin()).getType().cast<VectorType>();
+ }
+ VectorType getVectorType() {
+ return getResult().getType().cast<VectorType>();
+ }
+ }];
+}
+
+// TODO(andydavis) Add transformation which decomposes ReshapeOp into an
+// optimized sequence of vector rotate/shuffle/select operations.
+def Vector_ReshapeOp :
+ Vector_Op<"reshape", [AttrSizedOperandSegments, NoSideEffect]>,
+ Arguments<(ins AnyVector:$vector, Variadic<Index>:$input_shape,
+ Variadic<Index>:$output_shape,
+ I64ArrayAttr:$fixed_vector_sizes)>,
+ Results<(outs AnyVector:$result)> {
+ let summary = "vector reshape operation";
+ let description = [{
+ Reshapes its vector operand from 'input_shape' to 'output_shape' maintaining
+ fixed vector dimension 'fixed_vector_sizes' on the innermost vector
+ dimensions.
+
+ The parameters 'input_shape' and 'output_shape' represent valid data shapes
+ across fixed vector shapes. For example, if a vector has a valid data
+ shape [6] with fixed vector size [8], then the valid data elements are
+ assumed to be stored at the beginning of the vector with the remaining
+ vector elements undefined.
+
+ In the examples below, valid data elements are represented by an alphabetic
+ character, and undefined data elements are represented by '-'.
+
+ Example
+
+ vector<1x8xf32> with valid data shape [6], fixed vector sizes [8]
+
+ input: [a, b, c, d, e, f]
+
+ layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
+
+ vector layout: [a, b, c, d, e, f, -, -]
+
+ Example
+
+ vector<2x8xf32> with valid data shape [10], fixed vector sizes [8]
+
+ input: [a, b, c, d, e, f, g, h, i, j]
+
+ layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
+
+ vector layout: [[a, b, c, d, e, f, g, h],
+ [i, j, -, -, -, -, -, -]]
+
+ Example
+
+ vector<2x2x2x3xf32> with valid data shape [3, 5], fixed vector sizes
+ [2, 3]
+
+ input: [[a, b, c, d, e],
+ [f, g, h, i, j],
+ [k, l, m, n, o]]
+
+ layout map: (d0, d1) -> (d0 floordiv 3, d1 floordiv 5,
+ d0 mod 3, d1 mod 5)
+
+ vector layout: [[[[a, b, c],
+ [f, g, h]]
+ [[d, e, -],
+ [i, j, -]]],
+ [[[k, l, m],
+ [-, -, -]]
+ [[n, o, -],
+ [-, -, -]]]]
+
+ Example
+
+ %1 = vector.reshape %0, [%c3, %c6], [%c2, %c9], [4]
+ : vector<3x2x4xf32> to vector<2x3x4xf32>
+
+ input: [[a, b, c, d, e, f],
+ [g, h, i, j, k, l],
+ [m, n, o, p, q, r]]
+
+ layout map: (d0, d1) -> (d0, d1 floordiv 4, d1 mod 4)
+
+
+ Input vector: [[[a, b, c, d],
+ [e, f, -, -]],
+ [[g, h, i, j],
+ [k, l, -, -]],
+ [[m, n, o, p],
+ [q, r, -, -]]]
+
+ Output vector: [[[a, b, c, d],
+ [e, f, g, h],
+ [i, -, -, -]],
+ [[j, k, l, m],
+ [n, o, p, q],
+ [r, -, -, -]]]
+ }];
+
+ let extraClassDeclaration = [{
+ VectorType getInputVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ VectorType getOutputVectorType() {
+ return getResult().getType().cast<VectorType>();
+ }
+
+ /// Returns as integer value the number of input shape operands.
+ int64_t getNumInputShapeSizes() { return input_shape().size(); }
+
+ /// Returns as integer value the number of output shape operands.
+ int64_t getNumOutputShapeSizes() { return output_shape().size(); }
+
+ void getFixedVectorSizes(SmallVectorImpl<int64_t> &results);
+
+ static StringRef getFixedVectorSizesAttrName() {
+ return "fixed_vector_sizes";
+ }
+ static StringRef getInputShapeAttrName() { return "input_shape"; }
+ static StringRef getOutputShapeAttrName() { return "output_shape"; }
+ }];
+
+ let assemblyFormat = [{
+ $vector `,` `[` $input_shape `]` `,` `[` $output_shape `]` `,`
+ $fixed_vector_sizes attr-dict `:` type($vector) `to` type($result)
+ }];
+}
+
+def Vector_StridedSliceOp :
+ Vector_Op<"strided_slice", [NoSideEffect,
+ PredOpTrait<"operand and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>]>,
+ Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets,
+ I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>,
+ Results<(outs AnyVector)> {
+ let summary = "strided_slice operation";
+ let description = [{
+ Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized
+ `sizes` integer array attribute, a k-sized `strides` integer array
+ attribute and extracts the n-D subvector at the proper offset.
+
+ At the moment strides must contain only 1s.
+ // TODO(ntv) support non-1 strides.
+
+ Returns an n-D vector where the first k-D dimensions match the `sizes`
+ attribute. The returned subvector contains the elements starting at offset
+ `offsets` and ending at `offsets + sizes`.
+
+ Examples:
+ ```
+ %1 = vector.strided_slice %0
+ {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
+ vector<4x8x16xf32> to vector<2x4x16xf32>
+ ```
+
+ // TODO(ntv) Evolve to a range form syntax similar to:
+ %1 = vector.strided_slice %0[0:2:1][2:4:1]
+ vector<4x8x16xf32> to vector<2x4x16xf32>
+ }];
+ let builders = [OpBuilder<
+ "Builder *builder, OperationState &result, Value source, " #
+ "ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes, " #
+ "ArrayRef<int64_t> strides">];
+ let extraClassDeclaration = [{
+ static StringRef getOffsetsAttrName() { return "offsets"; }
+ static StringRef getSizesAttrName() { return "sizes"; }
+ static StringRef getStridesAttrName() { return "strides"; }
+ VectorType getVectorType(){ return vector().getType().cast<VectorType>(); }
+ void getOffsets(SmallVectorImpl<int64_t> &results);
+ }];
+ let hasCanonicalizer = 1;
+ let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)";
+}
+
+def Vector_TransferReadOp :
+ Vector_Op<"transfer_read">,
+ Arguments<(ins AnyMemRef:$memref, Variadic<Index>:$indices,
+ AffineMapAttr:$permutation_map, AnyType:$padding)>,
+ Results<(outs AnyVector:$vector)> {
+
+ let summary = "Reads a supervector from memory into an SSA vector value.";
+
+ let description = [{
+ The `vector.transfer_read` op performs a blocking read from a slice within
+ a [MemRef](../LangRef.md#memref-type) supplied as its first operand
+ into a [vector](../LangRef.md#vector-type) of the same base elemental type.
+
+ A memref operand with vector element type, must have its vector element
+ type match a suffix (shape and element type) of the vector (e.g.
+ memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>).
+
+ The slice is further defined by a full-rank index within the MemRef,
+ supplied as the operands `2 .. 1 + rank(memref)`. The permutation_map
+ [attribute](../LangRef.md#attributes) is an
+ [affine-map](Affine.md#affine-maps) which specifies the transposition on the
+ slice to match the vector shape. The size of the slice is specified by the
+ size of the vector, given as the return type. An `ssa-value` of the same
+ elemental type as the MemRef is provided as the last operand to specify
+ padding in the case of out-of-bounds accesses. This operation is called
+ 'read' by opposition to 'load' because the super-vector granularity is
+ generally not representable with a single hardware register.
+ A `vector.transfer_read` is thus a mid-level
+ abstraction that supports super-vectorization with non-effecting padding for
+ full-tile-only code.
+
+ More precisely, let's dive deeper into the permutation_map for the following
+ MLIR:
+
+ ```mlir
+ vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4]
+ { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } :
+ memref<?x?x?x?xf32>, vector<3x4x5xf32>
+ ```
+
+ This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3,
+ %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice
+ is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]`
+
+ That slice needs to be read into a `vector<3x4x5xf32>`. Since the
+ permutation map is not full rank, there must be a broadcast along vector
+ dimension `1`.
+
+ A notional lowering of vector.transfer_read could generate code resembling:
+
+ ```mlir
+ // %expr1, %expr2, %expr3, %expr4 defined before this point
+ %tmp = alloc() : vector<3x4x5xf32>
+ %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
+ for %i = 0 to 3 {
+ affine.for %j = 0 to 4 {
+ affine.for %k = 0 to 5 {
+ %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
+ memref<?x?x?x?xf32>
+ store %tmp[%i, %j, %k] : vector<3x4x5xf32>
+ }}}
+ %c0 = constant 0 : index
+ %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
+ ```
+
+ On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that
+ the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are
+ actually transferred between `%A` and `%tmp`.
+
+ Alternatively, if a notional vector broadcast operation were available, the
+ lowered code would resemble:
+
+ ```mlir
+ // %expr1, %expr2, %expr3, %expr4 defined before this point
+ %tmp = alloc() : vector<3x4x5xf32>
+ %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
+ for %i = 0 to 3 {
+ affine.for %k = 0 to 5 {
+ %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
+ memref<?x?x?x?xf32>
+ store %tmp[%i, 0, %k] : vector<3x4x5xf32>
+ }}
+ %c0 = constant 0 : index
+ %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
+ %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32>
+ ```
+
+ where `broadcast` broadcasts from element 0 to all others along the
+ specified dimension. This time, the temporary storage footprint is `3 * 5`
+ values which is the same amount of data as the `3 * 5` values transferred.
+ An additional `1` broadcast is required. On a GPU this broadcast could be
+ implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.
+
+ Syntax
+ ```
+ operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
+ `{` attribute-entry `} :` memref-type `,` vector-type
+ ```
+
+ Examples:
+
+ ```mlir
+ // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
+ // and pad with %f0 to handle the boundary case:
+ %f0 = constant 0.0f : f32
+ for %i0 = 0 to %0 {
+ affine.for %i1 = 0 to %1 step 256 {
+ affine.for %i2 = 0 to %2 step 32 {
+ %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
+ {permutation_map: (d0, d1, d2) -> (d2, d1)} :
+ memref<?x?x?xf32>, vector<32x256xf32>
+ }}}
+
+ // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
+ // vector<128xf32>. The underlying implementation will require a 1-D vector
+ // broadcast:
+ for %i0 = 0 to %0 {
+ affine.for %i1 = 0 to %1 {
+ %3 = vector.transfer_read %A[%i0, %i1]
+ {permutation_map: (d0, d1) -> (0)} :
+ memref<?x?xf32>, vector<128xf32>
+ }
+ }
+
+ // Read from a memref with vector element type.
+ %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
+ {permutation_map = (d0, d1)->(d0, d1)}
+ : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
+ ```
+ }];
+
+ let extraClassDeclaration = [{
+ MemRefType getMemRefType() {
+ return memref().getType().cast<MemRefType>();
+ }
+ VectorType getVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ }];
+}
+
+def Vector_TransferWriteOp :
+ Vector_Op<"transfer_write">,
+ Arguments<(ins AnyVector:$vector, AnyMemRef:$memref,
+ Variadic<Index>:$indices,
+ AffineMapAttr:$permutation_map)> {
+
+ let summary = "The vector.transfer_write op writes a supervector to memory.";
+
+ let description = [{
+ The `vector.transfer_write` performs a blocking write from a
+ [vector](../LangRef.md#vector-type), supplied as its first operand, into a
+ slice within a [MemRef](../LangRef.md#memref-type) of the same base
+ elemental type, supplied as its second operand.
+
+ A vector memref operand must have its vector element type match a suffix
+ (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>,
+ vector<1x1x4x3xf32>).
+
+ The slice is further defined by a full-rank index within the MemRef,
+ supplied as the operands `3 .. 2 + rank(memref)`.
+ The permutation_map [attribute](../LangRef.md#attributes) is an
+ [affine-map](Affine.md#affine-maps) which specifies the transposition on the
+ slice to match the vector shape. The size of the slice is specified by the
+ size of the vector. This operation is called 'write' by opposition to
+ 'store' because the super-vector granularity is generally not representable
+ with a single hardware register. A `vector.transfer_write` is thus a
+ mid-level abstraction that supports super-vectorization with non-effecting
+ padding for full-tile-only code. It is the responsibility of
+ `vector.transfer_write`'s implementation to ensure the memory writes are
+ valid. Different lowerings may be pertinent depending on the hardware
+ support.
+
+ Syntax:
+
+ ```
+ operation ::= `vector.transfer_write` ssa-use-list `{` attribute-entry `} :
+ ` vector-type ', ' memref-type '
+ ```
+
+ Examples:
+
+ ```mlir
+ // write vector<16x32x64xf32> into the slice
+ // `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
+ for %i0 = 0 to %0 {
+ affine.for %i1 = 0 to %1 step 32 {
+ affine.for %i2 = 0 to %2 step 64 {
+ affine.for %i3 = 0 to %3 step 16 {
+ %val = `ssa-value` : vector<16x32x64xf32>
+ vector.transfer_write %val, %A[%i0, %i1, %i2, %i3]
+ {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
+ vector<16x32x64xf32>, memref<?x?x?x?xf32>
+ }}}}
+
+ // write to a memref with vector element type.
+ vector.transfer_write %4, %arg1[%c3, %c3]
+ {permutation_map = (d0, d1)->(d0, d1)}
+ : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
+ ```
+ }];
+
+ let extraClassDeclaration = [{
+ VectorType getVectorType() {
+ return vector().getType().cast<VectorType>();
+ }
+ MemRefType getMemRefType() {
+ return memref().getType().cast<MemRefType>();
+ }
+ }];
+ let assemblyFormat = [{
+ $vector `,` $memref `[` $indices `]` attr-dict `:` type($vector) `,`
+ type($memref)
+ }];
+}
+
+def Vector_ShapeCastOp :
+ Vector_Op<"shape_cast", [NoSideEffect]>,
+ Arguments<(ins AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$source)>,
+ Results<(outs AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$result)> {
+ let summary = "shape_cast casts between vector shapes";
+ let description = [{
+ The shape_cast operation casts between an n-D source vector shape and
+ a k-D result vector shape (the element type remains the same).
+
+ If reducing rank (n > k), result dimension sizes must be a product
+ of contiguous source dimension sizes.
+ If expanding rank (n < k), source dimensions must factor into a
+ contiguous sequence of destination dimension sizes.
+ Each source dim is expanded (or contiguous sequence of source dims combined)
+ in source dimension list order (i.e. 0 <= i < n), to produce a contiguous
+ sequence of result dims (or a single result dim), in result dimension list
+ order (i.e. 0 <= j < k). The product of all source dimension sizes and all
+ result dimension sizes must match.
+
+ If the source/result types are a tuple of vectors, the casting operation
+ described above is applied to each source/result tuple element pair.
+
+ It is currently assumed that this operation does not require moving data,
+ and that it will be folded away before lowering vector operations.
+
+ There is an exception to the folding expectation when targeting
+ llvm.intr.matrix operations. We need a type conversion back and forth from a
+ 2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM
+ is supported in that particular case, for now.
+
+ Examples:
+
+ ```mlir
+ // Example casting to a lower vector rank.
+ %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32>
+
+ // Example casting to a higher vector rank.
+ %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32>
+
+ // Example casting a tuple of vectors of same rank, where tuple elements
+ // may have
diff erent shapes.
+ %5 = vector.shape_cast %4 : tuple<vector<3x4x2xf32>, vector<3x3x2xf32>> to
+ tuple<vector<12x2xf32>, vector<9x2xf32>>
+ ```
+ }];
+ let extraClassDeclaration = [{
+ VectorType getSourceVectorType() {
+ return source().getType().cast<VectorType>();
+ }
+ VectorType getResultVectorType() {
+ return getResult().getType().cast<VectorType>();
+ }
+ }];
+ let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
+}
+
+def Vector_TypeCastOp :
+ Vector_Op<"type_cast", [NoSideEffect]>,
+ Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>,
+ Results<(outs AnyMemRef)> {
+ let summary = "type_cast op converts a scalar memref to a vector memref";
+ let description = [{
+ Performs a conversion from a memref with scalar element to a memref with a
+ *single* vector element, copying the shape of the memref to the vector. This
+ is the minimal viable operation that is required to makeke
+ super-vectorization operational. It can be seen as a special case of the
+ `view` operation but scoped in the super-vectorization context.
+
+ Syntax:
+
+ ```
+ operation ::= `vector.type_cast` ssa-use : memref-type to memref-type
+ ```
+
+ Example:
+
+ ```mlir
+ %A = alloc() : memref<5x4x3xf32>
+ %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>>
+ ```
+ }];
+
+ let builders = [OpBuilder<
+ "Builder *builder, OperationState &result, Value source">];
+
+ let parser = [{
+ return impl::parseCastOp(parser, result);
+ }];
+
+ let extraClassDeclaration = [{
+ MemRefType getMemRefType() {
+ return memref().getType().cast<MemRefType>();
+ }
+ MemRefType getResultMemRefType() {
+ return getResult().getType().cast<MemRefType>();
+ }
+ }];
+}
+
+def Vector_ConstantMaskOp :
+ Vector_Op<"constant_mask", [NoSideEffect]>,
+ Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>,
+ Results<(outs VectorOf<[I1]>)> {
+ let summary = "creates a constant vector mask";
+ let description = [{
+ Creates and returns a vector mask where elements of the result vector
+ are set to '0' or '1', based on whether the element indices are contained
+ within a hyper-rectangular region specified by the 'mask_dim_sizes'
+ array attribute argument. Each element of the 'mask_dim_sizes' array,
+ specifies an exclusive upper bound [0, mask-dim-size-element-value)
+ for a unique dimension in the vector result. The conjunction of the ranges
+ define a hyper-rectangular region within which elements values are set to 1
+ (otherwise element values are set to 0).
+
+ Example: create a constant vector mask of size 4x3xi1 with elements in range
+ 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
+
+ %1 = vector.constant_mask [3, 2] : vector<4x3xi1>
+
+ print %1
+ columns
+ 0 1 2
+ |------------
+ 0 | 1 1 0
+ rows 1 | 1 1 0
+ 2 | 1 1 0
+ 3 | 0 0 0
+ }];
+
+ let extraClassDeclaration = [{
+ static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; }
+ }];
+ let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)";
+}
+
+def Vector_CreateMaskOp :
+ Vector_Op<"create_mask", [NoSideEffect]>,
+ Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> {
+ let summary = "creates a vector mask";
+ let description = [{
+ Creates and returns a vector mask where elements of the result vector
+ are set to '0' or '1', based on whether the element indices are contained
+ within a hyper-rectangular region specified by the operands. Specifically,
+ each operand specifies a range [0, operand-value) for a unique dimension in
+ the vector result. The conjunction of the operand ranges define a
+ hyper-rectangular region within which elements values are set to 1
+ (otherwise element values are set to 0).
+
+ Example: create a vector mask of size 4x3xi1 where elements in range
+ 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
+
+ %1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
+
+ print %1
+ columns
+ 0 1 2
+ |------------
+ 0 | 1 1 0
+ rows 1 | 1 1 0
+ 2 | 1 1 0
+ 3 | 0 0 0
+ }];
+
+ let hasCanonicalizer = 1;
+ let assemblyFormat = "$operands attr-dict `:` type(results)";
+}
+
+def Vector_TupleOp :
+ Vector_Op<"tuple", [NoSideEffect]>,
+ Arguments<(ins Variadic<AnyVector>:$vectors)>,
+ Results<(outs TupleOf<[AnyVector]>)> {
+ let summary = "make tuple of vectors operation";
+ let description = [{
+ Returns a tuple of its operands 'vectors'.
+
+ Note that this operation is used during the vector op unrolling
+ transformation and should be removed before lowering to lower-level
+ dialects.
+
+
+ Examples:
+ ```
+ %0 = vector.transfer_read ... : vector<2x2xf32>
+ %1 = vector.transfer_read ... : vector<2x1xf32>
+ %2 = vector.transfer_read ... : vector<2x2xf32>
+ %3 = vector.transfer_read ... : vector<2x1xf32>
+
+ %4 = vector.tuple %0, %1, %2, %3
+ : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>
+
+ ```
+ }];
+
+ let extraClassDeclaration = [{
+ TupleType getResultTupleType() {
+ return getResult().getType().cast<TupleType>();
+ }
+ }];
+}
+
+def Vector_TupleGetOp :
+ Vector_Op<"tuple_get", [NoSideEffect]>,
+ Arguments<(ins TupleOf<[AnyVector]>:$vectors, APIntAttr:$index)>,
+ Results<(outs AnyVector)> {
+ let summary = "vector tuple get operation";
+ let description = [{
+ Returns the tuple element of 'vectors' at 'index'.
+
+ Note that this operation is used during the vector op unrolling
+ transformation and should be removed before lowering to lower-level
+ dialects.
+
+ Examples:
+ ```
+ %4 = vector.tuple %0, %1, %2, %3
+ : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>>
+
+ %5 = vector.tuple_get %4, 1
+ : tuple<vector<2x2xf32>, vector<2x1xf32>,
+ vector<2x2xf32>, vector<2x1xf32>>
+ ```
+ }];
+
+ let extraClassDeclaration = [{
+ VectorType getResultVectorType() {
+ return getResult().getType().cast<VectorType>();
+ }
+ int64_t getIndex() {
+ return getAttrOfType<IntegerAttr>("index").getValue().getSExtValue();
+ }
+ static StringRef getIndexAttrName() { return "index"; }
+ }];
+ let hasFolder = 1;
+}
+
+def Vector_PrintOp :
+ Vector_Op<"print", []>, Arguments<(ins AnyType:$source)> {
+ let summary = "print operation (for testing and debugging)";
+ let description = [{
+ Prints the source vector (or scalar) to stdout in human readable
+ format (for testing and debugging). No return value.
+
+ Examples:
+ ```
+ %0 = constant 0.0 : f32
+ %1 = vector.broadcast %0 : f32 to vector<4xf32>
+ vector.print %1 : vector<4xf32>
+
+ when lowered to LLVM, the vector print is unrolled into
+ elementary printing method calls that at runtime will yield
+
+ ( 0.0, 0.0, 0.0, 0.0 )
+
+ on stdout when linked with a small runtime support library,
+ which only needs to provide a few printing methods (single
+ value for all data types, opening/closing bracket, comma,
+ newline).
+ ```
+ }];
+ let verifier = ?;
+ let extraClassDeclaration = [{
+ Type getPrintType() {
+ return source().getType();
+ }
+ }];
+ let assemblyFormat = "$source attr-dict `:` type($source)";
+}
+
+//===----------------------------------------------------------------------===//
+// Ops used for supporting progressive lowering and conversion type changes.
+//===----------------------------------------------------------------------===//
+
+/// Vector dialect matrix multiplication op that operates on flattened 1-D
+/// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR.
+/// This may seem redundant with vector.contract but it serves the purposes of
+/// more progressive lowering and localized type conversion on the path:
+/// `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
+def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect,
+ PredOpTrait<"lhs operand and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 0>>,
+ PredOpTrait<"rhs operand and result have same element type",
+ TCresVTEtIsSameAsOpBase<0, 1>>]>,
+ Arguments<(
+ // TODO(ntv, fhahn): tighten vector element types that make sense.
+ ins VectorOfRankAndType<[1],
+ [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$lhs,
+ VectorOfRankAndType<[1],
+ [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$rhs,
+ I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>,
+ Results<(
+ outs VectorOfRankAndType<[1],
+ [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$res)>
+{
+ let summary = "Vector matrix multiplication op that operates on flattened 1-D"
+ " MLIR vectors";
+ let description = [{
+ This is the counterpart of llvm.matrix.multiply in MLIR. It serves the
+ purposes of more progressive lowering and localized type conversion.
+
+ The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows
+ and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and
+ <rhs_columns> and multiplies them. The result matrix is returned embedded in
+ the result vector.
+
+ Example:
+
+ ```
+ %C = vector.matrix_multiply %A, %B
+ { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
+ (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
+ ```
+ }];
+ let builders = [
+ OpBuilder<"Builder *builder, OperationState &result, Value lhs, Value rhs, "
+ "unsigned lhsRows, unsigned lhsColumns, unsigned rhsColumns",
+ [{
+ result.addOperands({lhs, rhs});
+ result.addAttribute("lhs_rows", builder->getI32IntegerAttr(lhsRows));
+ result.addAttribute("lhs_columns", builder->getI32IntegerAttr(lhsColumns));
+ result.addAttribute("rhs_columns", builder->getI32IntegerAttr(rhsColumns));
+ result.addTypes(VectorType::get(lhsRows * lhsColumns,
+ lhs.getType().cast<VectorType>().getElementType()));
+ }]>,
+ ];
+ let verifier = ?;
+ let assemblyFormat = "$lhs `,` $rhs attr-dict "
+ "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)";
+}
+
+#endif // VECTOR_OPS
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index 9a14a1586c7f..aba4556e4bc1 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -15,7 +15,7 @@
#define MLIR_INITALLDIALECTS_H_
#include "mlir/Dialect/AVX512/AVX512Dialect.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/FxpMathOps/FxpMathOps.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h"
@@ -39,7 +39,7 @@ namespace mlir {
// all the possible dialects to be made available to the context automatically.
inline void registerAllDialects() {
static bool init_once = []() {
- registerDialect<AffineOpsDialect>();
+ registerDialect<AffineDialect>();
registerDialect<avx512::AVX512Dialect>();
registerDialect<fxpmath::FxpMathOpsDialect>();
registerDialect<gpu::GPUDialect>();
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index c1cac45816df..8a23292fc964 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -25,6 +25,8 @@
#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"
#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"
#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h"
+#include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/FxpMathOps/Passes.h"
#include "mlir/Dialect/FxpMathOps/Passes.h"
#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Dialect/LLVMIR/Transforms/LegalizeForExport.h"
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 8b7495ec0e58..8515bfd173ea 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -53,11 +53,6 @@ std::unique_ptr<OpPassBase<FuncOp>> createLoopUnrollPass(
std::unique_ptr<OpPassBase<FuncOp>>
createLoopUnrollAndJamPass(int unrollJamFactor = -1);
-/// Creates a simplification pass for affine structures (maps and sets). In
-/// addition, this pass also normalizes memrefs to have the trivial (identity)
-/// layout map.
-std::unique_ptr<OpPassBase<FuncOp>> createSimplifyAffineStructuresPass();
-
/// Creates a loop fusion pass which fuses loops. Buffers of size less than or
/// equal to `localBufSizeThreshold` are promoted to memory space
/// `fastMemorySpace'.
@@ -91,14 +86,6 @@ createLoopTilingPass(uint64_t cacheSizeBytes);
/// bounds into a single loop.
std::unique_ptr<OpPassBase<FuncOp>> createLoopCoalescingPass();
-/// Performs packing (or explicit copying) of accessed memref regions into
-/// buffers in the specified faster memory space through either pointwise copies
-/// or DMA operations.
-std::unique_ptr<OpPassBase<FuncOp>> createAffineDataCopyGenerationPass(
- unsigned slowMemorySpace, unsigned fastMemorySpace,
- unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,
- uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());
-
/// Creates a pass to perform optimizations relying on memref dataflow such as
/// store to load forwarding, elimination of dead stores, and dead allocs.
std::unique_ptr<OpPassBase<FuncOp>> createMemRefDataFlowOptPass();
diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp
index 6f1a21d0e5d0..56a0d72bb387 100644
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@@ -14,8 +14,8 @@
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/AffineOps/AffineValueMap.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp
index 0b75767d6210..3448443c3016 100644
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/AffineOps/AffineValueMap.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/IntegerSet.h"
diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt
index f9c0236e3d4e..262bc7e8a588 100644
--- a/mlir/lib/Analysis/CMakeLists.txt
+++ b/mlir/lib/Analysis/CMakeLists.txt
@@ -24,7 +24,7 @@ add_mlir_library(MLIRAnalysis
target_link_libraries(MLIRAnalysis
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRCallInterfaces
MLIRControlFlowInterfaces
MLIRInferTypeOpInterface
@@ -44,7 +44,7 @@ add_mlir_library(MLIRLoopAnalysis
target_link_libraries(MLIRLoopAnalysis
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRCallInterfaces
MLIRControlFlowInterfaces
MLIRInferTypeOpInterface
diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp
index b5ff91b44062..9cf8fbc815d2 100644
--- a/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Analysis/LoopAnalysis.cpp
@@ -15,8 +15,8 @@
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/AffineOps/AffineValueMap.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Support/MathExtras.h"
#include "llvm/ADT/DenseSet.h"
diff --git a/mlir/lib/Analysis/NestedMatcher.cpp b/mlir/lib/Analysis/NestedMatcher.cpp
index 2324bbcf7b56..807e5df46d1c 100644
--- a/mlir/lib/Analysis/NestedMatcher.cpp
+++ b/mlir/lib/Analysis/NestedMatcher.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "llvm/ADT/ArrayRef.h"
diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp
index fae41b4a027b..b1e45d1cfe7b 100644
--- a/mlir/lib/Analysis/SliceAnalysis.cpp
+++ b/mlir/lib/Analysis/SliceAnalysis.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Operation.h"
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index 7b3cd58aa980..940e07630e20 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -14,8 +14,8 @@
#include "mlir/Analysis/Utils.h"
#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/AffineOps/AffineValueMap.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
index 9c100a280a64..bb434abdc26c 100644
--- a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
+++ b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
@@ -13,7 +13,7 @@
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AffineExprVisitor.h"
diff --git a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
index aa916739697e..5613b28e3418 100644
--- a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
@@ -7,7 +7,7 @@ add_mlir_conversion_library(MLIRAffineToStandard
target_link_libraries(
MLIRAffineToStandard
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRLoopOps
MLIRPass
MLIRStandardOps
diff --git a/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt b/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
index dd69af418bde..9a460bcf7165 100644
--- a/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
+++ b/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
@@ -7,7 +7,7 @@ add_mlir_conversion_library(MLIRLoopsToGPU
)
target_link_libraries(MLIRLoopsToGPU
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRAffineToStandard
MLIRGPU
MLIRIR
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index 8023226bc300..ec5c9cff2ee0 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -15,7 +15,7 @@
#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
index 9a703199cba1..264e704f914f 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
@@ -8,7 +8,7 @@
#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"
#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -123,7 +123,7 @@ struct ParallelLoopToGpuPass : public OperationPass<ParallelLoopToGpuPass> {
populateParallelLoopToGPUPatterns(patterns, &getContext());
ConversionTarget target(getContext());
target.addLegalDialect<StandardOpsDialect>();
- target.addLegalDialect<AffineOpsDialect>();
+ target.addLegalDialect<AffineDialect>();
target.addLegalDialect<gpu::GPUDialect>();
target.addLegalDialect<loop::LoopOpsDialect>();
target.addIllegalOp<loop::ParallelOp>();
diff --git a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
index b16f02ef6b9c..b73d94562edc 100644
--- a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
+++ b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
@@ -13,7 +13,7 @@
#include <type_traits>
#include "mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h"
-#include "mlir/Dialect/AffineOps/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/LoopOps/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
diff --git a/mlir/lib/Dialect/Affine/CMakeLists.txt b/mlir/lib/Dialect/Affine/CMakeLists.txt
new file mode 100644
index 000000000000..c018b50f967f
--- /dev/null
+++ b/mlir/lib/Dialect/Affine/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_mlir_dialect_library(MLIRAffine
+ IR/AffineOps.cpp
+ IR/AffineValueMap.cpp
+ EDSC/Builders.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
+
+ DEPENDS
+ MLIRAffineOpsIncGen
+ )
+target_link_libraries(MLIRAffine
+ PUBLIC
+ MLIREDSC
+ MLIRIR
+ MLIRLoopLikeInterface
+ MLIRSideEffects
+ MLIRStandardOps
+ )
+
+add_subdirectory(Transforms)
diff --git a/mlir/lib/Dialect/AffineOps/EDSC/Builders.cpp b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp
similarity index 99%
rename from mlir/lib/Dialect/AffineOps/EDSC/Builders.cpp
rename to mlir/lib/Dialect/Affine/EDSC/Builders.cpp
index e69f3d6c8c7a..06f88dcec1be 100644
--- a/mlir/lib/Dialect/AffineOps/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/EDSC/Builders.h"
+#include "mlir/Dialect/Affine/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Builders.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
diff --git a/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt
new file mode 100644
index 000000000000..751bfd351bc6
--- /dev/null
+++ b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_mlir_dialect_library(MLIRAffine
+ EDSC/Builders.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
+
+ DEPENDS
+ MLIRAffineOpsIncGen
+ )
+target_link_libraries(MLIRAffine
+ PUBLIC
+ MLIREDSC
+ MLIRIR
+ MLIRLoopLikeInterface
+ MLIRSideEffects
+ MLIRStandardOps
+ )
diff --git a/mlir/lib/Dialect/AffineOps/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
similarity index 99%
rename from mlir/lib/Dialect/AffineOps/AffineOps.cpp
rename to mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 0b8795947e06..9e1f7e96bb0b 100644
--- a/mlir/lib/Dialect/AffineOps/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/AffineOps/AffineValueMap.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/IntegerSet.h"
@@ -25,7 +25,7 @@ using llvm::dbgs;
#define DEBUG_TYPE "affine-analysis"
//===----------------------------------------------------------------------===//
-// AffineOpsDialect Interfaces
+// AffineDialect Interfaces
//===----------------------------------------------------------------------===//
namespace {
@@ -64,21 +64,21 @@ struct AffineInlinerInterface : public DialectInlinerInterface {
} // end anonymous namespace
//===----------------------------------------------------------------------===//
-// AffineOpsDialect
+// AffineDialect
//===----------------------------------------------------------------------===//
-AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
+AffineDialect::AffineDialect(MLIRContext *context)
: Dialect(getDialectNamespace(), context) {
addOperations<AffineDmaStartOp, AffineDmaWaitOp, AffineLoadOp, AffineStoreOp,
#define GET_OP_LIST
-#include "mlir/Dialect/AffineOps/AffineOps.cpp.inc"
+#include "mlir/Dialect/Affine/IR/AffineOps.cpp.inc"
>();
addInterfaces<AffineInlinerInterface>();
}
/// Materialize a single constant operation from a given attribute value with
/// the desired resultant type.
-Operation *AffineOpsDialect::materializeConstant(OpBuilder &builder,
+Operation *AffineDialect::materializeConstant(OpBuilder &builder,
Attribute value, Type type,
Location loc) {
return builder.create<ConstantOp>(loc, type, value);
@@ -2369,4 +2369,4 @@ static ParseResult parseAffineParallelOp(OpAsmParser &parser,
//===----------------------------------------------------------------------===//
#define GET_OP_CLASSES
-#include "mlir/Dialect/AffineOps/AffineOps.cpp.inc"
+#include "mlir/Dialect/Affine/IR/AffineOps.cpp.inc"
diff --git a/mlir/lib/Dialect/AffineOps/AffineValueMap.cpp b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
similarity index 97%
rename from mlir/lib/Dialect/AffineOps/AffineValueMap.cpp
rename to mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
index bac183505a71..c17f59323a7f 100644
--- a/mlir/lib/Dialect/AffineOps/AffineValueMap.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/AffineValueMap.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
using namespace mlir;
diff --git a/mlir/lib/Dialect/AffineOps/CMakeLists.txt b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
similarity index 57%
rename from mlir/lib/Dialect/AffineOps/CMakeLists.txt
rename to mlir/lib/Dialect/Affine/IR/CMakeLists.txt
index bf490a5c9795..91dcceaf3912 100644
--- a/mlir/lib/Dialect/AffineOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
@@ -1,15 +1,14 @@
-add_mlir_dialect_library(MLIRAffineOps
+add_mlir_dialect_library(MLIRAffine
AffineOps.cpp
AffineValueMap.cpp
- EDSC/Builders.cpp
ADDITIONAL_HEADER_DIRS
- ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AffineOps
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
DEPENDS
MLIRAffineOpsIncGen
)
-target_link_libraries(MLIRAffineOps
+target_link_libraries(MLIRAffine
PUBLIC
MLIREDSC
MLIRIR
diff --git a/mlir/lib/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
similarity index 99%
rename from mlir/lib/Transforms/AffineDataCopyGeneration.cpp
rename to mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
index 5409c557da83..4f6d453fb56b 100644
--- a/mlir/lib/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -20,11 +20,11 @@
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Passes.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/Utils.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/CommandLine.h"
diff --git a/mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
similarity index 99%
rename from mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp
rename to mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index a8ea4b9c1e10..1616b87c4cdc 100644
--- a/mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -15,7 +15,7 @@
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
new file mode 100644
index 000000000000..d7e42b5489fa
--- /dev/null
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_mlir_dialect_library(MLIRAffineTransforms
+ AffineDataCopyGeneration.cpp
+ AffineLoopInvariantCodeMotion.cpp
+ SimplifyAffineStructures.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
+
+ DEPENDS
+ MLIRAffineOpsIncGen
+ MLIRLoopLikeInterfaceIncGen
+ )
+target_link_libraries(MLIRAffineTransforms
+ PUBLIC
+ MLIRAffine
+ MLIREDSC
+ MLIRIR
+ MLIRSideEffects
+ MLIRStandardOps
+ )
+
diff --git a/mlir/lib/Transforms/SimplifyAffineStructures.cpp b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
similarity index 95%
rename from mlir/lib/Transforms/SimplifyAffineStructures.cpp
rename to mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
index 671b3eab18df..60ad1545d350 100644
--- a/mlir/lib/Transforms/SimplifyAffineStructures.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
@@ -13,7 +13,10 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/IR/IntegerSet.h"
#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Transforms/Utils.h"
#define DEBUG_TYPE "simplify-affine-structure"
diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt
index 0bcc794894cc..ddc147fe2657 100644
--- a/mlir/lib/Dialect/CMakeLists.txt
+++ b/mlir/lib/Dialect/CMakeLists.txt
@@ -1,5 +1,5 @@
+add_subdirectory(Affine)
add_subdirectory(AVX512)
-add_subdirectory(AffineOps)
add_subdirectory(FxpMathOps)
add_subdirectory(GPU)
add_subdirectory(Linalg)
diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
index e2c64f050158..198c7fc698dd 100644
--- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/IR/Builders.h"
-#include "mlir/Dialect/AffineOps/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/LoopOps/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
diff --git a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
index 85ecf2802b25..bc9e244d4ad5 100644
--- a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
@@ -12,7 +12,7 @@ target_link_libraries(MLIRLinalgEDSC
PUBLIC
MLIREDSC
MLIRIR
- MLIRAffineOps
+ MLIRAffine
MLIRLinalgOps
MLIRLoopOps
MLIRStandardOps
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index 82e36f8a00b1..349c2d6980ac 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -14,7 +14,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
)
target_link_libraries(MLIRLinalgTransforms
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRAnalysis
MLIREDSC
MLIRIR
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
index 316a5a75617a..5e1748cc47c0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
index 54a4290e6e36..bee3f0dff0d2 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index cabdd7497caf..7b4a5c651d24 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/Linalg/Passes.h"
diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
index 681a47d31271..f9ad613f2a17 100644
--- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
@@ -9,7 +9,7 @@ add_mlir_dialect_library(MLIRLinalgUtils
target_link_libraries(MLIRLinalgUtils
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIREDSC
MLIRIR
MLIRLinalgOps
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 9cc6aa48c966..c572be4d132e 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
diff --git a/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt
index 1b6e6d232711..2ec44b472298 100644
--- a/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt
@@ -8,7 +8,7 @@ add_mlir_dialect_library(MLIRLoopOpsTransforms
)
target_link_libraries(MLIRLoopOpsTransforms
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRIR
MLIRPass
MLIRLoopOps
diff --git a/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopSpecialization.cpp b/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopSpecialization.cpp
index 8cb49f3428d6..c692c0174f0c 100644
--- a/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopSpecialization.cpp
+++ b/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopSpecialization.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/LoopOps/Passes.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
diff --git a/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopTiling.cpp b/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopTiling.cpp
index 6bced3761afb..85fd241cee7e 100644
--- a/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopTiling.cpp
+++ b/mlir/lib/Dialect/LoopOps/Transforms/ParallelLoopTiling.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/LoopOps/Passes.h"
#include "mlir/Dialect/LoopOps/Transforms.h"
diff --git a/mlir/lib/Dialect/Vector/CMakeLists.txt b/mlir/lib/Dialect/Vector/CMakeLists.txt
index 3e1d8de0d3ba..e5e1251768cd 100644
--- a/mlir/lib/Dialect/Vector/CMakeLists.txt
+++ b/mlir/lib/Dialect/Vector/CMakeLists.txt
@@ -16,7 +16,7 @@ target_link_libraries(MLIRVector
MLIREDSC
MLIRIR
MLIRStandardOps
- MLIRAffineOps
+ MLIRAffine
MLIRLoopOps
MLIRLoopAnalysis
MLIRSideEffects
diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
index dd47e0c80dc1..6e54e5b05fb6 100644
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -12,7 +12,7 @@
#include <type_traits>
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/VectorOps.h"
diff --git a/mlir/lib/Dialect/Vector/VectorUtils.cpp b/mlir/lib/Dialect/Vector/VectorUtils.cpp
index 1cace25b9835..f929dddd6d8d 100644
--- a/mlir/lib/Dialect/Vector/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/VectorUtils.cpp
@@ -12,7 +12,7 @@
#include "mlir/Dialect/Vector/VectorUtils.h"
#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp b/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
new file mode 100644
index 000000000000..e853c76d0dba
--- /dev/null
+++ b/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
@@ -0,0 +1,1349 @@
+//===- VectorToLoops.cpp - Conversion within the Vector dialect -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target-independent rewrites as 1->N patterns.
+//
+//===----------------------------------------------------------------------===//
+
+#include <type_traits>
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
+#include "mlir/Dialect/VectorOps/VectorOps.h"
+#include "mlir/Dialect/VectorOps/VectorTransforms.h"
+#include "mlir/Dialect/VectorOps/VectorUtils.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/Module.h"
+#include "mlir/IR/OperationSupport.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Types.h"
+#include "mlir/Support/Functional.h"
+#include "mlir/Support/STLExtras.h"
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "vector-to-vector"
+
+using namespace mlir;
+using llvm::dbgs;
+using mlir::functional::zipMap;
+
+static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
+
+static llvm::cl::opt<bool> lowerToLLVMMatrixIntrinsics(
+ "vector-lower-matrix-intrinsics",
+ llvm::cl::desc("Lower vector.contract to llvm.intr.matrix.multiply"),
+ llvm::cl::init(false), llvm::cl::cat(clOptionsCategory));
+
+/// Given a shape with sizes greater than 0 along all dimensions,
+/// returns the distance, in number of elements, between a slice in a dimension
+/// and the next slice in the same dimension.
+/// e.g. shape[3, 4, 5] -> linearization_basis[20, 5, 1]
+static SmallVector<int64_t, 8> computeStrides(ArrayRef<int64_t> shape) {
+ if (shape.empty())
+ return {};
+ SmallVector<int64_t, 8> tmp;
+ tmp.reserve(shape.size());
+ int64_t running = 1;
+ for (auto size : llvm::reverse(shape)) {
+ assert(size > 0 && "size must be nonnegative");
+ tmp.push_back(running);
+ running *= size;
+ }
+ return SmallVector<int64_t, 8>(tmp.rbegin(), tmp.rend());
+}
+
+static int64_t computeMaxLinearIndex(ArrayRef<int64_t> basis) {
+ if (basis.empty())
+ return 0;
+ int64_t res = 1;
+ for (auto b : basis)
+ res *= b;
+ return res;
+}
+
+/// Computes and returns the linearized index of 'offsets' w.r.t. 'basis'.
+static int64_t linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) {
+ assert(offsets.size() == basis.size());
+ int64_t linearIndex = 0;
+ for (unsigned idx = 0, e = basis.size(); idx < e; ++idx)
+ linearIndex += offsets[idx] * basis[idx];
+ return linearIndex;
+}
+
+// Clones `op` into a new operations that takes `operands` and returns
+// `resultTypes`.
+static Operation *cloneOpWithOperandsAndTypes(PatternRewriter &builder,
+ Location loc, Operation *op,
+ ArrayRef<Value> operands,
+ ArrayRef<Type> resultTypes) {
+ OperationState res(loc, op->getName().getStringRef(), operands, resultTypes,
+ op->getAttrs());
+ return builder.createOperation(res);
+}
+
+// Populates 'resultElements[indexMap[i]]' with elements from 'inputElements[i]'
+// for each index 'i' in inputElements with a valid mapping in 'indexMap'.
+static void getMappedElements(const DenseMap<int64_t, int64_t> &indexMap,
+ ArrayRef<int64_t> inputElements,
+ SmallVectorImpl<int64_t> &resultElements) {
+ assert(indexMap.size() == resultElements.size());
+ assert(inputElements.size() >= resultElements.size());
+ for (unsigned i = 0, e = inputElements.size(); i < e; ++i) {
+ auto it = indexMap.find(i);
+ if (it != indexMap.end())
+ resultElements[it->second] = inputElements[i];
+ }
+}
+
+// Returns a tuple type with vector element types for each resulting slice
+// of 'vectorType' unrolled by 'sizes' and 'strides'.
+// TODO(andydavis) Move this to a utility function and share it with
+// Extract/InsertSlicesOp verification.
+static TupleType generateExtractSlicesOpResultType(VectorType vectorType,
+ ArrayRef<int64_t> sizes,
+ ArrayRef<int64_t> strides,
+ PatternRewriter &builder) {
+ assert(llvm::all_of(strides, [](int64_t s) { return s == 1; }));
+ assert(static_cast<int64_t>(sizes.size()) == vectorType.getRank());
+ assert(static_cast<int64_t>(strides.size()) == vectorType.getRank());
+
+ // Compute shape ratio of 'shape' and 'sizes'.
+ auto shape = vectorType.getShape();
+ auto maybeDimSliceCounts = shapeRatio(shape, sizes);
+ assert(maybeDimSliceCounts.hasValue());
+ auto sliceDimCounts = *maybeDimSliceCounts;
+
+ // Compute strides w.r.t number of slices in each dimension.
+ auto sliceStrides = computeStrides(sliceDimCounts);
+ int64_t sliceCount = computeMaxLinearIndex(sliceDimCounts);
+ SmallVector<Type, 4> vectorTypes(sliceCount);
+ for (unsigned i = 0; i < sliceCount; ++i) {
+ auto vectorOffsets = delinearize(sliceStrides, i);
+ auto elementOffsets =
+ computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
+ auto sliceSizes = computeSliceSizes(shape, sizes, elementOffsets);
+ // Create Vector type and add to 'vectorTypes[i]'.
+ vectorTypes[i] = VectorType::get(sliceSizes, vectorType.getElementType());
+ }
+ return TupleType::get(vectorTypes, builder.getContext());
+}
+
+// UnrolledVectorState aggregates per-operand/result vector state required for
+// unrolling.
+struct UnrolledVectorState {
+ SmallVector<int64_t, 4> unrolledShape;
+ SmallVector<int64_t, 4> unrollFactors;
+ SmallVector<int64_t, 8> basis;
+ int64_t numInstances;
+ Value slicesTuple;
+};
+
+// Populates 'state' with unrolled shape, unroll factors, basis and
+// num unrolled instances for 'vectorType'.
+static void initUnrolledVectorState(VectorType vectorType, Value initValue,
+ const DenseMap<int64_t, int64_t> &indexMap,
+ ArrayRef<int64_t> targetShape,
+ UnrolledVectorState &state,
+ PatternRewriter &builder) {
+ // Compute unrolled shape of 'vectorType'.
+ state.unrolledShape.resize(vectorType.getRank());
+ getMappedElements(indexMap, targetShape, state.unrolledShape);
+ // Compute unroll factors for unrolled shape.
+ auto maybeUnrollFactors =
+ shapeRatio(vectorType.getShape(), state.unrolledShape);
+ assert(maybeUnrollFactors.hasValue());
+ state.unrollFactors = *maybeUnrollFactors;
+ // Compute 'basis' and 'numInstances' based on 'state.unrollFactors'.
+ state.basis = computeStrides(state.unrollFactors);
+ state.numInstances = computeMaxLinearIndex(state.unrollFactors);
+ state.slicesTuple = nullptr;
+ if (initValue != nullptr) {
+ // Create ExtractSlicesOp.
+ SmallVector<int64_t, 4> sizes(state.unrolledShape);
+ SmallVector<int64_t, 4> strides(state.unrollFactors.size(), 1);
+ auto tupleType =
+ generateExtractSlicesOpResultType(vectorType, sizes, strides, builder);
+ state.slicesTuple = builder.create<vector::ExtractSlicesOp>(
+ initValue.getLoc(), tupleType, initValue, sizes, strides);
+ }
+}
+
+// Computes and returns the linear index of the unrolled vector at
+// 'vectorOffsets' within the vector represented by 'state'.
+static int64_t
+getUnrolledVectorLinearIndex(UnrolledVectorState &state,
+ ArrayRef<int64_t> vectorOffsets,
+ DenseMap<int64_t, int64_t> &indexMap) {
+ // Compute vector offsets.
+ SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
+ getMappedElements(indexMap, vectorOffsets, sliceOffsets);
+ // Compute and return linear index of 'sliceOffsets' w.r.t 'state.basis'.
+ return linearize(sliceOffsets, state.basis);
+}
+
+// Returns an unrolled vector at 'vectorOffsets' within the vector
+// represented by 'state'. The vector is created from a slice of 'initValue'
+// if not present in 'cache'.
+static Value getOrCreateUnrolledVectorSlice(
+ Location loc, UnrolledVectorState &state, ArrayRef<int64_t> vectorOffsets,
+ ArrayRef<int64_t> offsets, DenseMap<int64_t, int64_t> &indexMap,
+ Value initValue, SmallVectorImpl<Value> &cache, PatternRewriter &builder) {
+ // Compute slice offsets.
+ SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
+ getMappedElements(indexMap, offsets, sliceOffsets);
+ // TODO(b/144845578) Support non-1 strides.
+ SmallVector<int64_t, 4> sliceStrides(state.unrolledShape.size(), 1);
+ // Compute linear index of 'sliceOffsets' w.r.t 'state.basis'.
+ int64_t sliceLinearIndex =
+ getUnrolledVectorLinearIndex(state, vectorOffsets, indexMap);
+ assert(sliceLinearIndex < static_cast<int64_t>(cache.size()));
+ auto valueSlice = cache[sliceLinearIndex];
+ if (valueSlice == nullptr) {
+ // Return tuple element at 'sliceLinearIndex'.
+ auto tupleIndex = builder.getI64IntegerAttr(sliceLinearIndex);
+ auto initValueType = initValue.getType().cast<VectorType>();
+ auto vectorType =
+ VectorType::get(state.unrolledShape, initValueType.getElementType());
+ // Initialize 'cache' with slice from 'initValue'.
+ valueSlice = builder.create<vector::TupleGetOp>(
+ loc, vectorType, state.slicesTuple, tupleIndex);
+ // Store value back to 'cache'.
+ cache[sliceLinearIndex] = valueSlice;
+ }
+ return valueSlice;
+}
+
+// VectorState aggregates per-operand/result vector state required for
+// creating slices of vector operands, and clones of the operation being
+// unrolled.
+struct VectorState {
+ // The type of this vector.
+ VectorType type;
+ // Map from iteration space index to vector dimension index.
+ DenseMap<int64_t, int64_t> indexMap;
+ // Index of this value in operation's operand list (-1 if not an operand).
+ int64_t operandIndex = -1;
+ // Accumulator iterator flag.
+ bool isAcc = false;
+};
+
+//
+// unrollSingleResultStructuredOp
+//
+// Returns a value representing the result of structured operation 'op'
+// with iteration bounds 'iterationBounds' unrolled to 'targetShape'.
+// A list of VectorState objects must be specified in 'vectors', where
+// each VectorState in the list represents a vector operand or vector result
+// (if the operation does not have an accumulator operand).
+// The VectorState at index 'resultIndex' in the list must be the state
+// associated with the operations single result (i.e. either its accumulator
+// operand or vector result value).
+//
+// Example:
+//
+// // Before unrolling
+//
+// operand0 operand1 operand2
+// \ | /
+// -------------------- opA --------------------
+//
+// // After unrolling by 2
+//
+// operand0 operand1 operand2
+// / \ / \ / \
+// slice00 slice01 slice10 slice11 slice20 slice21
+// \ | | | / |
+// -------------------- opA0 -------------------- |
+// | | | |
+// \ | | /
+// -------------------- opA1 -------------------
+// | |
+// \ /
+// insertslice
+// |
+
+// TODO(andydavis) Add the following canonicalization/simplifcation patterns:
+// *) Add pattern which matches InsertStridedSlice -> StridedSlice and forwards
+// InsertStridedSlice operand to StridedSlice.
+// *) Add pattern which matches SourceOp -> StridedSlice -> UserOp which checks
+// if there are duplicate identical StridedSlice ops from SourceOp, and
+// rewrites itself to use the first duplicate. This transformation should
+// cause users of identifical StridedSlice ops to reuse the same StridedSlice
+// operation, and leave the duplicate StridedSlice ops with no users
+// (removable with DCE).
+
+// TODO(andydavis) Generalize this to support structured ops beyond
+// vector ContractionOp, and merge it with 'unrollSingleResultOpMatchingType'
+static Value unrollSingleResultStructuredOp(Operation *op,
+ ArrayRef<int64_t> iterationBounds,
+ std::vector<VectorState> &vectors,
+ unsigned resultIndex,
+ ArrayRef<int64_t> targetShape,
+ PatternRewriter &builder) {
+ auto shapedType = op->getResult(0).getType().dyn_cast_or_null<ShapedType>();
+ if (!shapedType || !shapedType.hasStaticShape())
+ assert(false && "Expected a statically shaped result type");
+
+ // Compute unroll factors for 'iterationBounds' based on 'targetShape'
+ auto maybeUnrollFactors = shapeRatio(iterationBounds, targetShape);
+ if (!maybeUnrollFactors.hasValue())
+ assert(false && "Failed to compute unroll factors for target shape");
+ auto unrollFactors = *maybeUnrollFactors;
+
+ // Compute unrolled vector state for each vector in 'vectors'.
+ unsigned numVectors = vectors.size();
+ SmallVector<UnrolledVectorState, 3> unrolledVectorState(numVectors);
+ for (unsigned i = 0; i < numVectors; ++i) {
+ int64_t operandIndex = vectors[i].operandIndex;
+ auto operand = operandIndex >= 0 ? op->getOperand(operandIndex) : nullptr;
+ initUnrolledVectorState(vectors[i].type, operand, vectors[i].indexMap,
+ targetShape, unrolledVectorState[i], builder);
+ }
+ // Compute number of total unrolled instances.
+ auto numUnrolledInstances = computeMaxLinearIndex(unrollFactors);
+ auto sliceStrides = computeStrides(unrollFactors);
+
+ auto &resultValueState = unrolledVectorState[resultIndex];
+ auto unrolledResultType = VectorType::get(resultValueState.unrolledShape,
+ shapedType.getElementType());
+
+ // Initialize caches for intermediate vector results.
+ std::vector<SmallVector<Value, 4>> caches(numVectors);
+ for (unsigned i = 0; i < numVectors; ++i)
+ caches[i].resize(unrolledVectorState[i].numInstances);
+
+ // Unroll 'numUnrolledInstances' of 'op', storing results in 'caches'.
+ for (unsigned i = 0; i < numUnrolledInstances; ++i) {
+ auto vectorOffsets = delinearize(sliceStrides, i);
+ auto elementOffsets =
+ computeElementOffsetsFromVectorSliceOffsets(targetShape, vectorOffsets);
+ // Get cached slice (or create slice) for each operand at 'offsets'.
+ SmallVector<Value, 3> operands;
+ operands.resize(op->getNumOperands());
+ for (unsigned i = 0; i < numVectors; ++i) {
+ int64_t operandIndex = vectors[i].operandIndex;
+ if (operandIndex < 0)
+ continue; // Output
+ auto operand = op->getOperand(operandIndex);
+ operands[operandIndex] = getOrCreateUnrolledVectorSlice(
+ op->getLoc(), unrolledVectorState[i], vectorOffsets, elementOffsets,
+ vectors[i].indexMap, operand, caches[i], builder);
+ }
+ // Create op on sliced vector arguments.
+ auto resultVector =
+ cloneOpWithOperandsAndTypes(builder, op->getLoc(), op, operands,
+ unrolledResultType)
+ ->getResult(0);
+
+ // Compute linear result index.
+ int64_t linearIndex = getUnrolledVectorLinearIndex(
+ resultValueState, vectorOffsets, vectors[resultIndex].indexMap);
+ // Update result cache at 'linearIndex'.
+ caches[resultIndex][linearIndex] = resultVector;
+ }
+
+ // Create TupleOp of unrolled result vectors.
+ SmallVector<Type, 4> vectorTupleTypes(resultValueState.numInstances);
+ SmallVector<Value, 4> vectorTupleValues(resultValueState.numInstances);
+ for (unsigned i = 0; i < resultValueState.numInstances; ++i) {
+ vectorTupleTypes[i] = caches[resultIndex][i].getType().cast<VectorType>();
+ vectorTupleValues[i] = caches[resultIndex][i];
+ }
+ TupleType tupleType = builder.getTupleType(vectorTupleTypes);
+ Value tupleOp = builder.create<vector::TupleOp>(op->getLoc(), tupleType,
+ vectorTupleValues);
+
+ // Create InsertSlicesOp(Tuple(result_vectors)).
+ auto resultVectorType = op->getResult(0).getType().cast<VectorType>();
+ SmallVector<int64_t, 4> sizes(resultValueState.unrolledShape);
+ SmallVector<int64_t, 4> strides(resultValueState.unrollFactors.size(), 1);
+
+ Value insertSlicesOp = builder.create<vector::InsertSlicesOp>(
+ op->getLoc(), resultVectorType, tupleOp, builder.getI64ArrayAttr(sizes),
+ builder.getI64ArrayAttr(strides));
+ return insertSlicesOp;
+}
+
+static void getVectorContractionOpUnrollState(
+ vector::ContractionOp contractionOp, ArrayRef<int64_t> targetShape,
+ SmallVectorImpl<int64_t> &iterationBounds,
+ std::vector<VectorState> &vectors, unsigned &resultIndex) {
+ // Get contraction op iteration bounds.
+ contractionOp.getIterationBounds(iterationBounds);
+ assert(iterationBounds.size() == targetShape.size());
+ // Get map from iteration space index to lhs/rhs/result shape index.
+ std::vector<DenseMap<int64_t, int64_t>> iterationIndexMapList;
+ contractionOp.getIterationIndexMap(iterationIndexMapList);
+ unsigned numIterators = iterationIndexMapList.size();
+ vectors.resize(numIterators);
+ unsigned accOperandIndex = vector::ContractionOp::getAccOperandIndex();
+ for (unsigned i = 0; i < numIterators; ++i) {
+ vectors[i].type = contractionOp.getOperand(i).getType().cast<VectorType>();
+ vectors[i].indexMap = iterationIndexMapList[i];
+ vectors[i].operandIndex = i;
+ vectors[i].isAcc = i == accOperandIndex ? true : false;
+ }
+
+ if (llvm::size(contractionOp.masks()) == 2) {
+ // Add vectors for lhs/rhs vector mask arguments. Masks have the
+ // same vector shape lhs/rhs args, so copy their index maps.
+ vectors.push_back({contractionOp.getLHSVectorMaskType(),
+ vectors[0].indexMap, accOperandIndex + 1, false});
+ vectors.push_back({contractionOp.getRHSVectorMaskType(),
+ vectors[1].indexMap, accOperandIndex + 2, false});
+ }
+ // Unroll 'op' 'iterationBounds' to 'targetShape'.
+ // TODO(andydavis) Use linalg style 'args_in'/'args_out' to partition
+ // 'vectors' instead of 'resultIndex'.
+ resultIndex = accOperandIndex;
+}
+
+static void
+getVectorElementwiseOpUnrollState(Operation *op, ArrayRef<int64_t> targetShape,
+ SmallVectorImpl<int64_t> &iterationBounds,
+ std::vector<VectorState> &vectors,
+ unsigned &resultIndex) {
+ // Verify that operation and operands all have the same vector shape.
+ auto resultType = op->getResult(0).getType().dyn_cast_or_null<VectorType>();
+ assert(resultType && "Expected op with vector result type");
+ auto resultShape = resultType.getShape();
+ // Verify that all operands have the same vector type as result.
+ assert(llvm::all_of(op->getOperandTypes(),
+ [=](Type type) { return type == resultType; }));
+ // Populate 'iterationBounds' with 'resultShape' for elementwise operations.
+ iterationBounds.assign(resultShape.begin(), resultShape.end());
+
+ // Create trivial elementwise identity index map based on 'resultShape'.
+ DenseMap<int64_t, int64_t> indexMap;
+ indexMap.reserve(resultShape.size());
+ for (unsigned i = 0; i < resultShape.size(); ++i)
+ indexMap[i] = i;
+
+ // Create VectorState each operand and single result.
+ unsigned numVectors = op->getNumOperands() + op->getNumResults();
+ vectors.resize(numVectors);
+ for (unsigned i = 0; i < op->getNumOperands(); ++i)
+ vectors[i] = {resultType, indexMap, i, false};
+ vectors[numVectors - 1] = {resultType, indexMap, -1, false};
+ resultIndex = numVectors - 1;
+}
+
+// Entry point for unrolling declarative pattern rewrites.
+SmallVector<Value, 1> mlir::vector::unrollSingleResultOpMatchingType(
+ PatternRewriter &builder, Operation *op, ArrayRef<int64_t> targetShape) {
+ assert(op->getNumResults() == 1 && "Expected single result operation");
+
+ // Populate 'iterationBounds', 'vectors' and 'resultIndex' to unroll 'op'.
+ SmallVector<int64_t, 6> iterationBounds;
+ std::vector<VectorState> vectors;
+ unsigned resultIndex;
+
+ if (auto contractionOp = dyn_cast<vector::ContractionOp>(op)) {
+ // Populate state for vector ContractionOp.
+ getVectorContractionOpUnrollState(contractionOp, targetShape,
+ iterationBounds, vectors, resultIndex);
+ } else {
+ // Populate state for vector elementwise op.
+ getVectorElementwiseOpUnrollState(op, targetShape, iterationBounds, vectors,
+ resultIndex);
+ }
+
+ // Unroll 'op' with 'iterationBounds' to 'targetShape'.
+ return SmallVector<Value, 1>{unrollSingleResultStructuredOp(
+ op, iterationBounds, vectors, resultIndex, targetShape, builder)};
+}
+
+/// Generates slices of 'vectorType' according to 'sizes' and 'strides, and
+/// calls 'fn' with linear index and indices for each slice.
+static void
+generateTransferOpSlices(Type memrefElementType, VectorType vectorType,
+ TupleType tupleType, ArrayRef<int64_t> sizes,
+ ArrayRef<int64_t> strides, ArrayRef<Value> indices,
+ PatternRewriter &rewriter,
+ function_ref<void(unsigned, ArrayRef<Value>)> fn) {
+ // Compute strides w.r.t. to slice counts in each dimension.
+ auto maybeDimSliceCounts = shapeRatio(vectorType.getShape(), sizes);
+ assert(maybeDimSliceCounts.hasValue());
+ auto sliceDimCounts = *maybeDimSliceCounts;
+ auto sliceStrides = computeStrides(sliceDimCounts);
+
+ int64_t numSlices = tupleType.size();
+ unsigned numSliceIndices = indices.size();
+ // Compute 'indexOffset' at which to update 'indices', which is equal
+ // to the memref rank (indices.size) minus the effective 'vectorRank'.
+ // The effective 'vectorRank', is equal to the rank of the vector type
+ // minus the rank of the memref vector element type (if it has one).
+ //
+ // For example:
+ //
+ // Given memref type 'memref<6x2x1xvector<2x4xf32>>' and vector
+ // transfer_read/write ops which read/write vectors of type
+ // 'vector<2x1x2x4xf32>'. The memref rank is 3, and the effective
+ // vector rank is 4 - 2 = 2, and so 'indexOffset' = 3 - 2 = 1.
+ //
+ unsigned vectorRank = vectorType.getRank();
+ if (auto memrefVectorElementType = memrefElementType.dyn_cast<VectorType>()) {
+ assert(vectorRank >= memrefVectorElementType.getRank());
+ vectorRank -= memrefVectorElementType.getRank();
+ }
+ unsigned indexOffset = numSliceIndices - vectorRank;
+
+ auto *ctx = rewriter.getContext();
+ for (unsigned i = 0; i < numSlices; ++i) {
+ auto vectorOffsets = delinearize(sliceStrides, i);
+ auto elementOffsets =
+ computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
+ // Compute 'sliceIndices' by adding 'sliceOffsets[i]' to 'indices[i]'.
+ SmallVector<Value, 4> sliceIndices(numSliceIndices);
+ for (unsigned j = 0; j < numSliceIndices; ++j) {
+ if (j < indexOffset) {
+ sliceIndices[j] = indices[j];
+ } else {
+ auto expr = getAffineDimExpr(0, ctx) +
+ getAffineConstantExpr(elementOffsets[j - indexOffset], ctx);
+ auto map = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, expr);
+ sliceIndices[j] = rewriter.create<AffineApplyOp>(
+ indices[j].getLoc(), map, ArrayRef<Value>(indices[j]));
+ }
+ }
+ // Call 'fn' to generate slice 'i' at 'sliceIndices'.
+ fn(i, sliceIndices);
+ }
+}
+
+/// Returns true if 'map' is a suffix of an identity affine map, false
+/// otherwise. Example: affine_map<(d0, d1, d2, d3) -> (d2, d3)>
+static bool isIdentitySuffix(AffineMap map) {
+ if (map.getNumDims() < map.getNumResults())
+ return false;
+ ArrayRef<AffineExpr> results = map.getResults();
+ Optional<int> lastPos;
+ for (unsigned i = 0, e = map.getNumResults(); i < e; ++i) {
+ auto expr = results[i].dyn_cast<AffineDimExpr>();
+ if (!expr)
+ return false;
+ int currPos = static_cast<int>(expr.getPosition());
+ if (lastPos.hasValue() && currPos != lastPos.getValue() + 1)
+ return false;
+ lastPos = currPos;
+ }
+ return true;
+}
+
+namespace {
+
+// Splits vector TransferReadOp into smaller TransferReadOps based on slicing
+// scheme of its unique ExtractSlicesOp user.
+struct SplitTransferReadOp : public OpRewritePattern<vector::TransferReadOp> {
+ using OpRewritePattern<vector::TransferReadOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::TransferReadOp xferReadOp,
+ PatternRewriter &rewriter) const override {
+ // TODO(andydavis, ntv) Support splitting TransferReadOp with non-identity
+ // permutation maps. Repurpose code from MaterializeVectors transformation.
+ if (!isIdentitySuffix(xferReadOp.permutation_map()))
+ return matchFailure();
+ // Return unless the unique 'xferReadOp' user is an ExtractSlicesOp.
+ Value xferReadResult = xferReadOp.getResult();
+ auto extractSlicesOp =
+ dyn_cast<vector::ExtractSlicesOp>(*xferReadResult.getUsers().begin());
+ if (!xferReadResult.hasOneUse() || !extractSlicesOp)
+ return matchFailure();
+
+ // Get 'sizes' and 'strides' parameters from ExtractSlicesOp user.
+ auto sourceVectorType = extractSlicesOp.getSourceVectorType();
+ auto resultTupleType = extractSlicesOp.getResultTupleType();
+ SmallVector<int64_t, 4> sizes;
+ extractSlicesOp.getSizes(sizes);
+ SmallVector<int64_t, 4> strides;
+ extractSlicesOp.getStrides(strides);
+ assert(llvm::all_of(strides, [](int64_t s) { return s == 1; }));
+
+ Location loc = xferReadOp.getLoc();
+ auto memrefElementType =
+ xferReadOp.memref().getType().cast<MemRefType>().getElementType();
+ int64_t numSlices = resultTupleType.size();
+ SmallVector<Value, 4> vectorTupleValues(numSlices);
+ SmallVector<Value, 4> indices(xferReadOp.indices().begin(),
+ xferReadOp.indices().end());
+ auto createSlice = [&](unsigned index, ArrayRef<Value> sliceIndices) {
+ // Get VectorType for slice 'i'.
+ auto sliceVectorType = resultTupleType.getType(index);
+ // Create split TransferReadOp for 'sliceUser'.
+ vectorTupleValues[index] = rewriter.create<vector::TransferReadOp>(
+ loc, sliceVectorType, xferReadOp.memref(), sliceIndices,
+ xferReadOp.permutation_map(), xferReadOp.padding());
+ };
+ generateTransferOpSlices(memrefElementType, sourceVectorType,
+ resultTupleType, sizes, strides, indices, rewriter,
+ createSlice);
+
+ // Create tuple of splice xfer read operations.
+ Value tupleOp = rewriter.create<vector::TupleOp>(loc, resultTupleType,
+ vectorTupleValues);
+ // Replace 'xferReadOp' with result 'insertSlicesResult'.
+ rewriter.replaceOpWithNewOp<vector::InsertSlicesOp>(
+ xferReadOp, sourceVectorType, tupleOp, extractSlicesOp.sizes(),
+ extractSlicesOp.strides());
+ return matchSuccess();
+ }
+};
+
+// Splits vector TransferWriteOp into smaller TransferWriteOps for each source.
+struct SplitTransferWriteOp : public OpRewritePattern<vector::TransferWriteOp> {
+ using OpRewritePattern<vector::TransferWriteOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::TransferWriteOp xferWriteOp,
+ PatternRewriter &rewriter) const override {
+ // TODO(andydavis, ntv) Support splitting TransferWriteOp with non-identity
+ // permutation maps. Repurpose code from MaterializeVectors transformation.
+ if (!isIdentitySuffix(xferWriteOp.permutation_map()))
+ return matchFailure();
+ // Return unless the 'xferWriteOp' 'vector' operand is an 'InsertSlicesOp'.
+ auto *vectorDefOp = xferWriteOp.vector().getDefiningOp();
+ auto insertSlicesOp = dyn_cast_or_null<vector::InsertSlicesOp>(vectorDefOp);
+ if (!insertSlicesOp)
+ return matchFailure();
+
+ // Get TupleOp operand of 'insertSlicesOp'.
+ auto tupleOp = dyn_cast_or_null<vector::TupleOp>(
+ insertSlicesOp.vectors().getDefiningOp());
+ if (!tupleOp)
+ return matchFailure();
+
+ // Get 'sizes' and 'strides' parameters from InsertSlicesOp user.
+ auto sourceTupleType = insertSlicesOp.getSourceTupleType();
+ auto resultVectorType = insertSlicesOp.getResultVectorType();
+ SmallVector<int64_t, 4> sizes;
+ insertSlicesOp.getSizes(sizes);
+ SmallVector<int64_t, 4> strides;
+ insertSlicesOp.getStrides(strides);
+
+ Location loc = xferWriteOp.getLoc();
+ auto memrefElementType =
+ xferWriteOp.memref().getType().cast<MemRefType>().getElementType();
+ SmallVector<Value, 4> indices(xferWriteOp.indices().begin(),
+ xferWriteOp.indices().end());
+ auto createSlice = [&](unsigned index, ArrayRef<Value> sliceIndices) {
+ // Create split TransferWriteOp for source vector 'tupleOp.operand[i]'.
+ rewriter.create<vector::TransferWriteOp>(
+ loc, tupleOp.getOperand(index), xferWriteOp.memref(), sliceIndices,
+ xferWriteOp.permutation_map());
+ };
+ generateTransferOpSlices(memrefElementType, resultVectorType,
+ sourceTupleType, sizes, strides, indices, rewriter,
+ createSlice);
+
+ // Erase old 'xferWriteOp'.
+ rewriter.eraseOp(xferWriteOp);
+ return matchSuccess();
+ }
+};
+
+/// Decomposes ShapeCastOp on tuple-of-vectors to multiple ShapeCastOps, each
+/// on vector types.
+struct ShapeCastOpDecomposer : public OpRewritePattern<vector::ShapeCastOp> {
+ using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::ShapeCastOp shapeCastOp,
+ PatternRewriter &rewriter) const override {
+ // Check if 'shapeCastOp' has tuple source/result type.
+ auto sourceTupleType =
+ shapeCastOp.source().getType().dyn_cast_or_null<TupleType>();
+ auto resultTupleType =
+ shapeCastOp.result().getType().dyn_cast_or_null<TupleType>();
+ if (!sourceTupleType || !resultTupleType)
+ return matchFailure();
+ assert(sourceTupleType.size() == resultTupleType.size());
+
+ // Create single-vector ShapeCastOp for each source tuple element.
+ Location loc = shapeCastOp.getLoc();
+ SmallVector<Value, 8> resultElements;
+ resultElements.reserve(resultTupleType.size());
+ for (unsigned i = 0, e = sourceTupleType.size(); i < e; ++i) {
+ auto sourceElement = rewriter.create<vector::TupleGetOp>(
+ loc, sourceTupleType.getType(i), shapeCastOp.source(),
+ rewriter.getI64IntegerAttr(i));
+ resultElements.push_back(rewriter.create<vector::ShapeCastOp>(
+ loc, resultTupleType.getType(i), sourceElement));
+ }
+
+ // Replace 'shapeCastOp' with tuple of 'resultElements'.
+ rewriter.replaceOpWithNewOp<vector::TupleOp>(shapeCastOp, resultTupleType,
+ resultElements);
+ return matchSuccess();
+ }
+};
+
+/// ShapeCastOpFolder folds cancelling ShapeCastOps away.
+//
+// Example:
+//
+// The following MLIR with cancelling ShapeCastOps:
+//
+// %0 = source : vector<5x4x2xf32>
+// %1 = shape_cast %0 : vector<5x4x2xf32> to vector<20x2xf32>
+// %2 = shape_cast %1 : vector<20x2xf32> to vector<5x4x2xf32>
+// %3 = user %2 : vector<5x4x2xf32>
+//
+// Should canonicalize to the following:
+//
+// %0 = source : vector<5x4x2xf32>
+// %1 = user %0 : vector<5x4x2xf32>
+//
+struct ShapeCastOpFolder : public OpRewritePattern<vector::ShapeCastOp> {
+ using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::ShapeCastOp shapeCastOp,
+ PatternRewriter &rewriter) const override {
+ // Check if 'shapeCastOp' has vector source/result type.
+ auto sourceVectorType =
+ shapeCastOp.source().getType().dyn_cast_or_null<VectorType>();
+ auto resultVectorType =
+ shapeCastOp.result().getType().dyn_cast_or_null<VectorType>();
+ if (!sourceVectorType || !resultVectorType)
+ return matchFailure();
+
+ // Check if shape cast op source operand is also a shape cast op.
+ auto sourceShapeCastOp = dyn_cast_or_null<vector::ShapeCastOp>(
+ shapeCastOp.source().getDefiningOp());
+ if (!sourceShapeCastOp)
+ return matchFailure();
+ auto operandSourceVectorType =
+ sourceShapeCastOp.source().getType().cast<VectorType>();
+ auto operandResultVectorType =
+ sourceShapeCastOp.result().getType().cast<VectorType>();
+
+ // Check if shape cast operations invert each other.
+ if (operandSourceVectorType != resultVectorType ||
+ operandResultVectorType != sourceVectorType)
+ return matchFailure();
+
+ rewriter.replaceOp(shapeCastOp, sourceShapeCastOp.source());
+ return matchSuccess();
+ }
+};
+
+// Patter rewrite which forward tuple elements to their users.
+// User(TupleGetOp(ExtractSlicesOp(InsertSlicesOp(TupleOp(Producer)))))
+// -> User(Producer)
+struct TupleGetFolderOp : public OpRewritePattern<vector::TupleGetOp> {
+ using OpRewritePattern<vector::TupleGetOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::TupleGetOp tupleGetOp,
+ PatternRewriter &rewriter) const override {
+ // Return if 'tupleGetOp.vectors' arg was not defined by ExtractSlicesOp.
+ auto extractSlicesOp = dyn_cast_or_null<vector::ExtractSlicesOp>(
+ tupleGetOp.vectors().getDefiningOp());
+ if (!extractSlicesOp)
+ return matchFailure();
+
+ // Return if 'extractSlicesOp.vector' arg was not defined by InsertSlicesOp.
+ auto insertSlicesOp = dyn_cast_or_null<vector::InsertSlicesOp>(
+ extractSlicesOp.vector().getDefiningOp());
+ if (!insertSlicesOp)
+ return matchFailure();
+
+ // Return if 'insertSlicesOp.vectors' arg was not defined by TupleOp.
+ auto tupleOp = dyn_cast_or_null<vector::TupleOp>(
+ insertSlicesOp.vectors().getDefiningOp());
+ if (!tupleOp)
+ return matchFailure();
+
+ // Forward Value from 'tupleOp' at 'tupleGetOp.index'.
+ Value tupleValue = tupleOp.getOperand(tupleGetOp.getIndex());
+ rewriter.replaceOp(tupleGetOp, tupleValue);
+ return matchSuccess();
+ }
+};
+
+/// Progressive lowering of ExtractSlicesOp to tuple of StridedSliceOp.
+/// One:
+/// %x = vector.extract_slices %0
+/// is replaced by:
+/// %a = vector.strided_slice %0
+/// %b = vector.strided_slice %0
+/// ..
+/// %x = vector.tuple %a, %b, ..
+class ExtractSlicesOpLowering
+ : public OpRewritePattern<vector::ExtractSlicesOp> {
+public:
+ using OpRewritePattern<vector::ExtractSlicesOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::ExtractSlicesOp op,
+ PatternRewriter &rewriter) const override {
+ auto loc = op.getLoc();
+
+ VectorType vectorType = op.getSourceVectorType();
+ auto shape = vectorType.getShape();
+
+ SmallVector<int64_t, 4> sizes;
+ op.getSizes(sizes);
+ SmallVector<int64_t, 4> strides;
+ op.getStrides(strides); // all-ones at the moment
+
+ // For each element in the tuple, generate the proper strided slice.
+ TupleType tupleType = op.getResultTupleType();
+ int64_t tupleSize = tupleType.size();
+ SmallVector<Value, 4> tupleValues(tupleSize);
+ auto sliceStrides = computeStrides(shape, sizes);
+ for (int64_t i = 0; i < tupleSize; ++i) {
+ auto vectorOffsets = delinearize(sliceStrides, i);
+ auto elementOffsets =
+ computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
+ auto sliceSizes = computeSliceSizes(shape, sizes, elementOffsets);
+ // Insert in tuple.
+ tupleValues[i] = rewriter.create<vector::StridedSliceOp>(
+ loc, op.vector(), elementOffsets, sliceSizes, strides);
+ }
+
+ rewriter.replaceOpWithNewOp<vector::TupleOp>(op, tupleType, tupleValues);
+ return matchSuccess();
+ }
+};
+
+/// Progressive lowering of InsertSlicesOp to series of InsertStridedSliceOp.
+/// One:
+/// %x = vector.insert_slices %0
+/// is replaced by:
+/// %r0 = vector.splat 0
+// %t1 = vector.tuple_get %0, 0
+/// %r1 = vector.insert_strided_slice %r0, %t1
+// %t2 = vector.tuple_get %0, 1
+/// %r2 = vector.insert_strided_slice %r1, %t2
+/// ..
+/// %x = ..
+class InsertSlicesOpLowering : public OpRewritePattern<vector::InsertSlicesOp> {
+public:
+ using OpRewritePattern<vector::InsertSlicesOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::InsertSlicesOp op,
+ PatternRewriter &rewriter) const override {
+ auto loc = op.getLoc();
+
+ VectorType vectorType = op.getResultVectorType();
+ auto shape = vectorType.getShape();
+
+ SmallVector<int64_t, 4> sizes;
+ op.getSizes(sizes);
+ SmallVector<int64_t, 4> strides;
+ op.getStrides(strides); // all-ones at the moment
+
+ // Prepare result.
+ auto elemType = vectorType.getElementType();
+ Value zero = rewriter.create<ConstantOp>(loc, elemType,
+ rewriter.getZeroAttr(elemType));
+ Value result = rewriter.create<SplatOp>(loc, vectorType, zero);
+
+ // For each element in the tuple, extract the proper strided slice.
+ TupleType tupleType = op.getSourceTupleType();
+ int64_t tupleSize = tupleType.size();
+ auto sliceStrides = computeStrides(shape, sizes);
+ for (int64_t i = 0; i < tupleSize; ++i) {
+ auto vectorOffsets = delinearize(sliceStrides, i);
+ auto elementOffsets =
+ computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
+ // Extract from tuple into the result.
+ auto index = rewriter.getI64IntegerAttr(i);
+ auto tupleGet = rewriter.create<vector::TupleGetOp>(
+ loc, tupleType.getType(i), op.getOperand(), index);
+ result = rewriter.create<vector::InsertStridedSliceOp>(
+ loc, tupleGet, result, elementOffsets, strides);
+ }
+
+ rewriter.replaceOp(op, result);
+ return matchSuccess();
+ }
+};
+
+/// Progressive lowering of OuterProductOp.
+/// One:
+/// %x = vector.outerproduct %lhs, %rhs, %acc
+/// is replaced by:
+/// %z = zero-result
+/// %0 = vector.extract %lhs[0]
+/// %1 = vector.broadcast %0
+/// %2 = vector.extract %acc[0]
+/// %3 = vector.fma %1, %arg1, %2
+/// %4 = vector.insert %3, %z[0]
+/// ..
+/// %x = vector.insert %.., %..[N-1]
+///
+class OuterProductOpLowering : public OpRewritePattern<vector::OuterProductOp> {
+public:
+ using OpRewritePattern<vector::OuterProductOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::OuterProductOp op,
+ PatternRewriter &rewriter) const override {
+ auto loc = op.getLoc();
+
+ VectorType rhsType = op.getOperandVectorTypeRHS();
+ VectorType resType = op.getVectorType();
+ Type eltType = resType.getElementType();
+ Value acc = (op.acc().empty()) ? nullptr : op.acc()[0];
+
+ Value zero = rewriter.create<ConstantOp>(loc, eltType,
+ rewriter.getZeroAttr(eltType));
+ Value result = rewriter.create<SplatOp>(loc, resType, zero);
+ for (int64_t d = 0, e = resType.getDimSize(0); d < e; ++d) {
+ auto pos = rewriter.getI64ArrayAttr(d);
+ Value x = rewriter.create<vector::ExtractOp>(loc, eltType, op.lhs(), pos);
+ Value b = rewriter.create<vector::BroadcastOp>(loc, rhsType, x);
+ Value m;
+ if (acc) {
+ Value z = rewriter.create<vector::ExtractOp>(loc, rhsType, acc, pos);
+ m = rewriter.create<vector::FMAOp>(loc, b, op.rhs(), z);
+ } else {
+ m = rewriter.create<MulFOp>(loc, b, op.rhs());
+ }
+ result = rewriter.create<vector::InsertOp>(loc, resType, m, result, pos);
+ }
+ rewriter.replaceOp(op, result);
+ return matchSuccess();
+ }
+};
+
+/// Progressive lowering of ContractionOp.
+/// One:
+/// %x = vector.contract with at least one free/batch dimension
+/// is replaced by:
+/// %a = vector.contract with one less free/batch dimension
+/// %b = vector.contract with one less free/batch dimension
+/// ..
+/// %x = combine %a %b ..
+/// until a pure contraction is reached (no free/batch dimensions),
+/// which is replaced by a fma/reduction op.
+///
+/// TODO(ajcbik): break down into transpose/reshape/cast ops
+/// when they become available to avoid code dup
+/// TODO(ajcbik): investigate lowering order impact on performance
+class ContractionOpLowering : public OpRewritePattern<vector::ContractionOp> {
+public:
+ using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::ContractionOp op,
+ PatternRewriter &rewriter) const override {
+ // TODO(ajcbik): implement masks
+ if (llvm::size(op.masks()) != 0)
+ return matchFailure();
+
+ // TODO(ntv, ajcbik): implement benefits, cost models, separate this out in
+ // a new pattern.
+ // TODO(ntv, fhahn): once row-major mode is available in LLVM's matrix
+ // intrinsics, use that.
+ if (lowerToLLVMMatrixIntrinsics &&
+ isColumnMajorMatmul(op.indexing_maps())) {
+ VectorType lhsType = op.getLhsType();
+ VectorType rhsType = op.getRhsType();
+ Type flattenedLHSType =
+ VectorType::get(lhsType.getNumElements(), lhsType.getElementType());
+ Type flattenedRHSType =
+ VectorType::get(rhsType.getNumElements(), rhsType.getElementType());
+ auto lhs = rewriter.create<vector::ShapeCastOp>(
+ op.getLoc(), flattenedLHSType, op.lhs());
+ auto rhs = rewriter.create<vector::ShapeCastOp>(
+ op.getLoc(), flattenedRHSType, op.rhs());
+
+ unsigned lhsRows = op.getLhsType().getShape()[0];
+ unsigned lhsColumns = op.getLhsType().getShape()[1];
+ unsigned rhsColumns = op.getRhsType().getShape()[1];
+ Value mul = rewriter.create<vector::MatmulOp>(
+ op.getLoc(), lhs, rhs, lhsRows, lhsColumns, rhsColumns);
+ mul = rewriter.create<vector::ShapeCastOp>(op.getLoc(),
+ op.acc().getType(), mul);
+ Type elementType = op.getLhsType().getElementType();
+ assert(elementType.isIntOrFloat());
+ if (elementType.isa<IntegerType>())
+ rewriter.replaceOpWithNewOp<AddIOp>(op, op.acc(), mul);
+ else
+ rewriter.replaceOpWithNewOp<AddFOp>(op, op.acc(), mul);
+ return matchSuccess();
+ }
+
+ // Find first batch dimension in LHS/RHS, and lower when found.
+ std::vector<std::pair<int64_t, int64_t>> batchDimMap = op.getBatchDimMap();
+ if (!batchDimMap.empty()) {
+ int64_t lhsIndex = batchDimMap[0].first;
+ int64_t rhsIndex = batchDimMap[0].second;
+ rewriter.replaceOp(op, lowerParallel(op, lhsIndex, rhsIndex, rewriter));
+ return matchSuccess();
+ }
+
+ // Collect contracting dimensions.
+ std::vector<std::pair<int64_t, int64_t>> contractingDimMap =
+ op.getContractingDimMap();
+ DenseSet<int64_t> lhsContractingDimSet;
+ DenseSet<int64_t> rhsContractingDimSet;
+ for (auto &dimPair : contractingDimMap) {
+ lhsContractingDimSet.insert(dimPair.first);
+ rhsContractingDimSet.insert(dimPair.second);
+ }
+
+ // Find first free dimension in LHS, and lower when found.
+ VectorType lhsType = op.getLhsType();
+ for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e;
+ ++lhsIndex) {
+ if (lhsContractingDimSet.count(lhsIndex) == 0) {
+ rewriter.replaceOp(
+ op, lowerParallel(op, lhsIndex, /*rhsIndex=*/-1, rewriter));
+ return matchSuccess();
+ }
+ }
+
+ // Find first free dimension in RHS, and lower when found.
+ VectorType rhsType = op.getRhsType();
+ for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e;
+ ++rhsIndex) {
+ if (rhsContractingDimSet.count(rhsIndex) == 0) {
+ rewriter.replaceOp(
+ op, lowerParallel(op, /*lhsIndex=*/-1, rhsIndex, rewriter));
+ return matchSuccess();
+ }
+ }
+
+ // Lower the first remaining reduction dimension.
+ if (!contractingDimMap.empty()) {
+ rewriter.replaceOp(op, lowerReduction(op, rewriter));
+ return matchSuccess();
+ }
+
+ return matchFailure();
+ }
+
+private:
+ // Lower one parallel dimension.
+ // TODO(ajcbik): consider reusing existing contract unrolling
+ Value lowerParallel(vector::ContractionOp op, int64_t lhsIndex,
+ int64_t rhsIndex, PatternRewriter &rewriter) const {
+ VectorType lhsType = op.getLhsType();
+ VectorType rhsType = op.getRhsType();
+ VectorType resType = op.getResultType().cast<VectorType>();
+ // Find the iterator type index and result index.
+ SmallVector<AffineMap, 4> iMap = op.getIndexingMaps();
+ int64_t iterIndex = -1;
+ int64_t dimSize = -1;
+ if (lhsIndex >= 0) {
+ iterIndex =
+ iMap[0].getResult(lhsIndex).cast<AffineDimExpr>().getPosition();
+ assert((rhsIndex < 0 || iterIndex == iMap[1]
+ .getResult(rhsIndex)
+ .cast<AffineDimExpr>()
+ .getPosition()) &&
+ "parallel index should be free in LHS or batch in LHS/RHS");
+ dimSize = lhsType.getDimSize(lhsIndex);
+ } else {
+ assert(rhsIndex >= 0 && "missing parallel index");
+ iterIndex =
+ iMap[1].getResult(rhsIndex).cast<AffineDimExpr>().getPosition();
+ dimSize = rhsType.getDimSize(rhsIndex);
+ }
+ assert(iterIndex >= 0 && "parallel index not listed in operand mapping");
+ Optional<int64_t> lookup = getResultIndex(iMap[2], iterIndex);
+ assert(lookup.hasValue() && "parallel index not listed in reduction");
+ int64_t resIndex = lookup.getValue();
+ // Construct new iterator types and affine map array attribute.
+ SmallVector<AffineMap, 4> lowIndexingMaps;
+ lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter));
+ lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter));
+ lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter));
+ auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps);
+ auto lowIter =
+ rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex));
+ // Unroll into a series of lower dimensional vector.contract ops.
+ Location loc = op.getLoc();
+ Value result = zeroVector(loc, resType, rewriter);
+ for (int64_t d = 0; d < dimSize; ++d) {
+ auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter);
+ auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter);
+ auto acc = reshapeLoad(loc, op.acc(), resType, resIndex, d, rewriter);
+ Value lowContract = rewriter.create<vector::ContractionOp>(
+ loc, lhs, rhs, acc, lowAffine, lowIter);
+ result = reshapeStore(loc, lowContract, result, resType, resIndex, d,
+ rewriter);
+ }
+ return result;
+ }
+
+ // Lower one reduction dimension.
+ Value lowerReduction(vector::ContractionOp op,
+ PatternRewriter &rewriter) const {
+ auto loc = op.getLoc();
+ VectorType lhsType = op.getLhsType();
+ VectorType rhsType = op.getRhsType();
+ Type resType = op.getResultType();
+ assert(!resType.isa<VectorType>());
+ // Use iterator index 0.
+ int64_t iterIndex = 0;
+ SmallVector<AffineMap, 4> iMap = op.getIndexingMaps();
+ Optional<int64_t> lookupLhs = getResultIndex(iMap[0], iterIndex);
+ Optional<int64_t> lookupRhs = getResultIndex(iMap[1], iterIndex);
+ assert(lookupLhs.hasValue() && "missing LHS parallel index");
+ assert(lookupRhs.hasValue() && "missing RHS parallel index");
+ int64_t lhsIndex = lookupLhs.getValue();
+ int64_t rhsIndex = lookupRhs.getValue();
+ int64_t dimSize = lhsType.getDimSize(lhsIndex);
+ assert(dimSize == rhsType.getDimSize(rhsIndex) && "corrupt shape");
+ // Base case.
+ if (lhsType.getRank() == 1) {
+ assert(rhsType.getRank() == 1 && "corrupt contraction");
+ Value zero = zeroVector(loc, lhsType, rewriter);
+ Value fma = rewriter.create<vector::FMAOp>(loc, op.lhs(), op.rhs(), zero);
+ StringAttr kind = rewriter.getStringAttr("add");
+ return rewriter.create<vector::ReductionOp>(loc, resType, kind, fma,
+ op.acc());
+ }
+ // Construct new iterator types and affine map array attribute.
+ SmallVector<AffineMap, 4> lowIndexingMaps;
+ lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter));
+ lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter));
+ lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter));
+ auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps);
+ auto lowIter =
+ rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex));
+ // Unroll into a series of lower dimensional vector.contract ops.
+ // By feeding the initial accumulator into the first contraction,
+ // and the result of each contraction into the next, eventually
+ // the sum of all reductions is computed.
+ Value result = op.acc();
+ for (int64_t d = 0; d < dimSize; ++d) {
+ auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter);
+ auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter);
+ result = rewriter.create<vector::ContractionOp>(loc, lhs, rhs, result,
+ lowAffine, lowIter);
+ }
+ return result;
+ }
+
+ // Helper method to construct a zero vector.
+ static Value zeroVector(Location loc, VectorType vType,
+ PatternRewriter &rewriter) {
+ Type eltType = vType.getElementType();
+ Value zero = rewriter.create<ConstantOp>(loc, eltType,
+ rewriter.getZeroAttr(eltType));
+ return rewriter.create<SplatOp>(loc, vType, zero);
+ }
+
+ // Helper to find an index in an affine map.
+ static Optional<int64_t> getResultIndex(AffineMap map, int64_t index) {
+ for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) {
+ int64_t idx = map.getResult(i).cast<AffineDimExpr>().getPosition();
+ if (idx == index)
+ return i;
+ }
+ return None;
+ }
+
+ // Helper to construct iterator types with one index removed.
+ static SmallVector<Attribute, 4> adjustIter(ArrayAttr iteratorTypes,
+ int64_t index) {
+ SmallVector<Attribute, 4> results;
+ for (auto it : llvm::enumerate(iteratorTypes)) {
+ int64_t idx = it.index();
+ if (idx == index)
+ continue;
+ results.push_back(it.value());
+ }
+ return results;
+ }
+
+ // Helper to construct an affine map with one index removed.
+ static AffineMap adjustMap(AffineMap map, int64_t index,
+ PatternRewriter &rewriter) {
+ auto *ctx = rewriter.getContext();
+ SmallVector<AffineExpr, 4> results;
+ for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) {
+ int64_t idx = map.getResult(i).cast<AffineDimExpr>().getPosition();
+ if (idx == index)
+ continue;
+ // Re-insert remaining indices, but renamed when occurring
+ // after the removed index.
+ auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx);
+ results.push_back(targetExpr);
+ }
+ // The (...) -> () affine map has its own factory method.
+ return results.empty() ? AffineMap::get(map.getNumDims() - 1, 0, ctx)
+ : AffineMap::get(map.getNumDims() - 1, 0, results);
+ }
+
+ // Helper to drop dimension from vector type.
+ static Type adjustType(VectorType tp, int64_t index) {
+ int64_t rank = tp.getRank();
+ Type eltType = tp.getElementType();
+ if (rank == 1) {
+ assert(index == 0 && "index for scalar result out of bounds");
+ return eltType;
+ }
+ SmallVector<int64_t, 4> adjustedShape;
+ for (int64_t i = 0; i < rank; ++i) {
+ // Omit dimension at the given index.
+ if (i == index)
+ continue;
+ // Otherwise, add dimension back.
+ adjustedShape.push_back(tp.getDimSize(i));
+ }
+ return VectorType::get(adjustedShape, eltType);
+ }
+
+ // Helper method to possibly drop a dimension in a load.
+ // TODO(ajcbik): use a reshaping vector load (and share lowering code)
+ static Value reshapeLoad(Location loc, Value val, VectorType type,
+ int64_t index, int64_t pos,
+ PatternRewriter &rewriter) {
+ if (index == -1)
+ return val;
+ Type lowType = adjustType(type, 0);
+ // At extraction dimension?
+ if (index == 0) {
+ auto posAttr = rewriter.getI64ArrayAttr(pos);
+ return rewriter.create<vector::ExtractOp>(loc, lowType, val, posAttr);
+ }
+ // Unroll leading dimensions.
+ VectorType vType = lowType.cast<VectorType>();
+ VectorType resType = adjustType(type, index).cast<VectorType>();
+ Value result = zeroVector(loc, resType, rewriter);
+ for (int64_t d = 0, e = resType.getDimSize(0); d < e; d++) {
+ auto posAttr = rewriter.getI64ArrayAttr(d);
+ Value ext = rewriter.create<vector::ExtractOp>(loc, vType, val, posAttr);
+ Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter);
+ result = rewriter.create<vector::InsertOp>(loc, resType, load, result,
+ posAttr);
+ }
+ return result;
+ }
+
+ // Helper method to possibly drop a dimension in a store.
+ // TODO(ajcbik): use a reshaping vector store (and share lowering code)
+ static Value reshapeStore(Location loc, Value val, Value result,
+ VectorType type, int64_t index, int64_t pos,
+ PatternRewriter &rewriter) {
+ // Unmodified?
+ if (index == -1)
+ return val;
+ // At insertion dimension?
+ if (index == 0) {
+ auto posAttr = rewriter.getI64ArrayAttr(pos);
+ return rewriter.create<vector::InsertOp>(loc, type, val, result, posAttr);
+ }
+ // Unroll leading dimensions.
+ Type lowType = adjustType(type, 0);
+ VectorType vType = lowType.cast<VectorType>();
+ Type insType = adjustType(vType, 0);
+ for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) {
+ auto posAttr = rewriter.getI64ArrayAttr(d);
+ Value ext =
+ rewriter.create<vector::ExtractOp>(loc, vType, result, posAttr);
+ Value ins =
+ rewriter.create<vector::ExtractOp>(loc, insType, val, posAttr);
+ Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter);
+ result =
+ rewriter.create<vector::InsertOp>(loc, type, sto, result, posAttr);
+ }
+ return result;
+ }
+};
+
+/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D
+/// vectors progressively on the way to target llvm.matrix intrinsics.
+/// This iterates over the most major dimension of the 2-D vector and performs
+/// rewrites into:
+/// vector.extract from 2-D + vector.insert_strided_slice offset into 1-D
+class ShapeCastOp2DDownCastRewritePattern
+ : public OpRewritePattern<vector::ShapeCastOp> {
+public:
+ using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::ShapeCastOp op,
+ PatternRewriter &rewriter) const override {
+ auto sourceVectorType = op.getSourceVectorType();
+ auto resultVectorType = op.getResultVectorType();
+ if (sourceVectorType.getRank() != 2 || resultVectorType.getRank() != 1)
+ return matchFailure();
+
+ auto loc = op.getLoc();
+ auto elemType = sourceVectorType.getElementType();
+ Value zero = rewriter.create<ConstantOp>(loc, elemType,
+ rewriter.getZeroAttr(elemType));
+ Value desc = rewriter.create<SplatOp>(loc, resultVectorType, zero);
+ unsigned mostMinorVectorSize = sourceVectorType.getShape()[1];
+ for (int64_t i = 0, e = sourceVectorType.getShape().front(); i != e; ++i) {
+ Value vec = rewriter.create<vector::ExtractOp>(loc, op.source(), i);
+ desc = rewriter.create<vector::InsertStridedSliceOp>(
+ loc, vec, desc,
+ /*offsets=*/i * mostMinorVectorSize, /*strides=*/1);
+ }
+ rewriter.replaceOp(op, desc);
+ return matchSuccess();
+ }
+};
+
+/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D
+/// vectors progressively on the way from targeting llvm.matrix intrinsics.
+/// This iterates over the most major dimension of the 2-D vector and performs
+/// rewrites into:
+/// vector.strided_slice from 1-D + vector.insert into 2-D
+class ShapeCastOp2DUpCastRewritePattern
+ : public OpRewritePattern<vector::ShapeCastOp> {
+public:
+ using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
+
+ PatternMatchResult matchAndRewrite(vector::ShapeCastOp op,
+ PatternRewriter &rewriter) const override {
+ auto sourceVectorType = op.getSourceVectorType();
+ auto resultVectorType = op.getResultVectorType();
+ if (sourceVectorType.getRank() != 1 || resultVectorType.getRank() != 2)
+ return matchFailure();
+
+ auto loc = op.getLoc();
+ auto elemType = sourceVectorType.getElementType();
+ Value zero = rewriter.create<ConstantOp>(loc, elemType,
+ rewriter.getZeroAttr(elemType));
+ Value desc = rewriter.create<SplatOp>(loc, resultVectorType, zero);
+ unsigned mostMinorVectorSize = resultVectorType.getShape()[1];
+ for (int64_t i = 0, e = resultVectorType.getShape().front(); i != e; ++i) {
+ Value vec = rewriter.create<vector::StridedSliceOp>(
+ loc, op.source(), /*offsets=*/i * mostMinorVectorSize,
+ /*sizes=*/mostMinorVectorSize,
+ /*strides=*/1);
+ desc = rewriter.create<vector::InsertOp>(loc, vec, desc, i);
+ }
+ rewriter.replaceOp(op, desc);
+ return matchSuccess();
+ }
+};
+
+} // namespace
+
+// TODO(andydavis) Add pattern to rewrite ExtractSlices(ConstantMaskOp).
+// TODO(andydavis) Add this as DRR pattern.
+void mlir::vector::populateVectorToVectorTransformationPatterns(
+ OwningRewritePatternList &patterns, MLIRContext *context) {
+ patterns.insert<ShapeCastOpDecomposer, ShapeCastOpFolder, SplitTransferReadOp,
+ SplitTransferWriteOp, TupleGetFolderOp>(context);
+}
+
+void mlir::vector::populateVectorSlicesLoweringPatterns(
+ OwningRewritePatternList &patterns, MLIRContext *context) {
+ patterns.insert<ExtractSlicesOpLowering, InsertSlicesOpLowering>(context);
+}
+
+void mlir::vector::populateVectorContractLoweringPatterns(
+ OwningRewritePatternList &patterns, MLIRContext *context) {
+ patterns.insert<ContractionOpLowering, ShapeCastOp2DDownCastRewritePattern,
+ ShapeCastOp2DUpCastRewritePattern, OuterProductOpLowering>(
+ context);
+}
diff --git a/mlir/lib/Dialect/VectorOps/VectorUtils.cpp b/mlir/lib/Dialect/VectorOps/VectorUtils.cpp
new file mode 100644
index 000000000000..46a990080a4f
--- /dev/null
+++ b/mlir/lib/Dialect/VectorOps/VectorUtils.cpp
@@ -0,0 +1,278 @@
+//===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
+//
+// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utility methods for working with the VectorOps dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/VectorOps/VectorUtils.h"
+#include "mlir/Analysis/LoopAnalysis.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/VectorOps/VectorOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/IntegerSet.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Support/Functional.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/MathExtras.h"
+#include "mlir/Support/STLExtras.h"
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+
+using llvm::SetVector;
+
+namespace mlir {
+
+SmallVector<int64_t, 4> computeStrides(ArrayRef<int64_t> shape,
+ ArrayRef<int64_t> sizes) {
+ int64_t rank = shape.size();
+ // Compute the count for each dimension.
+ SmallVector<int64_t, 4> sliceDimCounts(rank);
+ for (int64_t r = 0; r < rank; ++r)
+ sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);
+ // Use that to compute the slice stride for each dimension.
+ SmallVector<int64_t, 4> sliceStrides(rank);
+ sliceStrides[rank - 1] = 1;
+ for (int64_t r = rank - 2; r >= 0; --r)
+ sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1];
+ return sliceStrides;
+}
+
+SmallVector<int64_t, 4> delinearize(ArrayRef<int64_t> sliceStrides,
+ int64_t index) {
+ int64_t rank = sliceStrides.size();
+ SmallVector<int64_t, 4> vectorOffsets(rank);
+ for (int64_t r = 0; r < rank; ++r) {
+ assert(sliceStrides[r] > 0);
+ vectorOffsets[r] = index / sliceStrides[r];
+ index %= sliceStrides[r];
+ }
+ return vectorOffsets;
+}
+
+SmallVector<int64_t, 4>
+computeElementOffsetsFromVectorSliceOffsets(ArrayRef<int64_t> sizes,
+ ArrayRef<int64_t> vectorOffsets) {
+ return functional::zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
+ vectorOffsets, sizes);
+}
+
+SmallVector<int64_t, 4> computeSliceSizes(ArrayRef<int64_t> shape,
+ ArrayRef<int64_t> sizes,
+ ArrayRef<int64_t> elementOffsets) {
+ int64_t rank = shape.size();
+ SmallVector<int64_t, 4> sliceSizes(rank);
+ for (unsigned r = 0; r < rank; ++r)
+ sliceSizes[r] = std::min(sizes[r], shape[r] - elementOffsets[r]);
+ return sliceSizes;
+}
+
+Optional<SmallVector<int64_t, 4>> shapeRatio(ArrayRef<int64_t> superShape,
+ ArrayRef<int64_t> subShape) {
+ if (superShape.size() < subShape.size()) {
+ return Optional<SmallVector<int64_t, 4>>();
+ }
+
+ // Starting from the end, compute the integer divisors.
+ // Set the boolean `divides` if integral division is not possible.
+ std::vector<int64_t> result;
+ result.reserve(superShape.size());
+ bool divides = true;
+ auto divide = [÷s, &result](int superSize, int subSize) {
+ assert(superSize > 0 && "superSize must be > 0");
+ assert(subSize > 0 && "subSize must be > 0");
+ divides &= (superSize % subSize == 0);
+ result.push_back(superSize / subSize);
+ };
+ functional::zipApply(
+ divide, SmallVector<int64_t, 8>{superShape.rbegin(), superShape.rend()},
+ SmallVector<int64_t, 8>{subShape.rbegin(), subShape.rend()});
+
+ // If integral division does not occur, return and let the caller decide.
+ if (!divides) {
+ return None;
+ }
+
+ // At this point we computed the ratio (in reverse) for the common
+ // size. Fill with the remaining entries from the super-vector shape (still in
+ // reverse).
+ int commonSize = subShape.size();
+ std::copy(superShape.rbegin() + commonSize, superShape.rend(),
+ std::back_inserter(result));
+
+ assert(result.size() == superShape.size() &&
+ "super to sub shape ratio is not of the same size as the super rank");
+
+ // Reverse again to get it back in the proper order and return.
+ return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
+}
+
+Optional<SmallVector<int64_t, 4>> shapeRatio(VectorType superVectorType,
+ VectorType subVectorType) {
+ assert(superVectorType.getElementType() == subVectorType.getElementType() &&
+ "vector types must be of the same elemental type");
+ return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
+}
+
+/// Constructs a permutation map from memref indices to vector dimension.
+///
+/// The implementation uses the knowledge of the mapping of enclosing loop to
+/// vector dimension. `enclosingLoopToVectorDim` carries this information as a
+/// map with:
+/// - keys representing "vectorized enclosing loops";
+/// - values representing the corresponding vector dimension.
+/// The algorithm traverses "vectorized enclosing loops" and extracts the
+/// at-most-one MemRef index that is invariant along said loop. This index is
+/// guaranteed to be at most one by construction: otherwise the MemRef is not
+/// vectorizable.
+/// If this invariant index is found, it is added to the permutation_map at the
+/// proper vector dimension.
+/// If no index is found to be invariant, 0 is added to the permutation_map and
+/// corresponds to a vector broadcast along that dimension.
+///
+/// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
+/// signalling that no permutation map can be constructed given
+/// `enclosingLoopToVectorDim`.
+///
+/// Examples can be found in the documentation of `makePermutationMap`, in the
+/// header file.
+static AffineMap makePermutationMap(
+ ArrayRef<Value> indices,
+ const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
+ if (enclosingLoopToVectorDim.empty())
+ return AffineMap();
+ MLIRContext *context =
+ enclosingLoopToVectorDim.begin()->getFirst()->getContext();
+ using functional::makePtrDynCaster;
+ using functional::map;
+ SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
+ getAffineConstantExpr(0, context));
+
+ for (auto kvp : enclosingLoopToVectorDim) {
+ assert(kvp.second < perm.size());
+ auto invariants = getInvariantAccesses(
+ cast<AffineForOp>(kvp.first).getInductionVar(), indices);
+ unsigned numIndices = indices.size();
+ unsigned countInvariantIndices = 0;
+ for (unsigned dim = 0; dim < numIndices; ++dim) {
+ if (!invariants.count(indices[dim])) {
+ assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
+ "permutationMap already has an entry along dim");
+ perm[kvp.second] = getAffineDimExpr(dim, context);
+ } else {
+ ++countInvariantIndices;
+ }
+ }
+ assert((countInvariantIndices == numIndices ||
+ countInvariantIndices == numIndices - 1) &&
+ "Vectorization prerequisite violated: at most 1 index may be "
+ "invariant wrt a vectorized loop");
+ }
+ return AffineMap::get(indices.size(), 0, perm);
+}
+
+/// Implementation detail that walks up the parents and records the ones with
+/// the specified type.
+/// TODO(ntv): could also be implemented as a collect parents followed by a
+/// filter and made available outside this file.
+template <typename T>
+static SetVector<Operation *> getParentsOfType(Operation *op) {
+ SetVector<Operation *> res;
+ auto *current = op;
+ while (auto *parent = current->getParentOp()) {
+ if (auto typedParent = dyn_cast<T>(parent)) {
+ assert(res.count(parent) == 0 && "Already inserted");
+ res.insert(parent);
+ }
+ current = parent;
+ }
+ return res;
+}
+
+/// Returns the enclosing AffineForOp, from closest to farthest.
+static SetVector<Operation *> getEnclosingforOps(Operation *op) {
+ return getParentsOfType<AffineForOp>(op);
+}
+
+AffineMap
+makePermutationMap(Operation *op, ArrayRef<Value> indices,
+ const DenseMap<Operation *, unsigned> &loopToVectorDim) {
+ DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
+ auto enclosingLoops = getEnclosingforOps(op);
+ for (auto *forInst : enclosingLoops) {
+ auto it = loopToVectorDim.find(forInst);
+ if (it != loopToVectorDim.end()) {
+ enclosingLoopToVectorDim.insert(*it);
+ }
+ }
+ return makePermutationMap(indices, enclosingLoopToVectorDim);
+}
+
+bool matcher::operatesOnSuperVectorsOf(Operation &op,
+ VectorType subVectorType) {
+ // First, extract the vector type and distinguish between:
+ // a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
+ // vector.transfer_write); and
+ // b. ops that *may* lower a super-vector (all other ops).
+ // The ops that *may* lower a super-vector only do so if the super-vector to
+ // sub-vector ratio exists. The ops that *must* lower a super-vector are
+ // explicitly checked for this property.
+ /// TODO(ntv): there should be a single function for all ops to do this so we
+ /// do not have to special case. Maybe a trait, or just a method, unclear atm.
+ bool mustDivide = false;
+ (void)mustDivide;
+ VectorType superVectorType;
+ if (auto read = dyn_cast<vector::TransferReadOp>(op)) {
+ superVectorType = read.getVectorType();
+ mustDivide = true;
+ } else if (auto write = dyn_cast<vector::TransferWriteOp>(op)) {
+ superVectorType = write.getVectorType();
+ mustDivide = true;
+ } else if (op.getNumResults() == 0) {
+ if (!isa<ReturnOp>(op)) {
+ op.emitError("NYI: assuming only return operations can have 0 "
+ " results at this point");
+ }
+ return false;
+ } else if (op.getNumResults() == 1) {
+ if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) {
+ superVectorType = v;
+ } else {
+ // Not a vector type.
+ return false;
+ }
+ } else {
+ // Not a vector.transfer and has more than 1 result, fail hard for now to
+ // wake us up when something changes.
+ op.emitError("NYI: operation has more than 1 result");
+ return false;
+ }
+
+ // Get the ratio.
+ auto ratio = shapeRatio(superVectorType, subVectorType);
+
+ // Sanity check.
+ assert((ratio.hasValue() || !mustDivide) &&
+ "vector.transfer operation in which super-vector size is not an"
+ " integer multiple of sub-vector size");
+
+ // This catches cases that are not strictly necessary to have multiplicity but
+ // still aren't divisible by the sub-vector shape.
+ // This could be useful information if we wanted to reshape at the level of
+ // the vector type (but we would have to look at the compute and distinguish
+ // between parallel, reduction and possibly other cases.
+ if (!ratio.hasValue()) {
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace mlir
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 6d1d7b41d568..ac2bafb39e3a 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -1,8 +1,6 @@
add_subdirectory(Utils)
add_mlir_library(MLIRTransforms
- AffineDataCopyGeneration.cpp
- AffineLoopInvariantCodeMotion.cpp
Canonicalizer.cpp
CSE.cpp
DialectConversion.cpp
@@ -17,7 +15,6 @@ add_mlir_library(MLIRTransforms
MemRefDataFlowOpt.cpp
OpStats.cpp
PipelineDataTransfer.cpp
- SimplifyAffineStructures.cpp
StripDebugInfo.cpp
SymbolDCE.cpp
Vectorize.cpp
@@ -33,7 +30,7 @@ add_mlir_library(MLIRTransforms
target_link_libraries(MLIRTransforms
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRAnalysis
MLIRLoopLikeInterface
MLIRLoopOps
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 378e91a214bd..2f08f95261f2 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -14,7 +14,7 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp
index 0c411144df9c..49bcc5897c48 100644
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@@ -14,7 +14,7 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
diff --git a/mlir/lib/Transforms/LoopUnroll.cpp b/mlir/lib/Transforms/LoopUnroll.cpp
index 2083a1226879..ca1ed7729345 100644
--- a/mlir/lib/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Transforms/LoopUnroll.cpp
@@ -13,7 +13,7 @@
#include "mlir/Transforms/Passes.h"
#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
index 4629e2c94bd4..7d4be3490114 100644
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@@ -35,7 +35,7 @@
#include "mlir/Transforms/Passes.h"
#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BlockAndValueMapping.h"
diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
index c1128c949baf..eaf5c744723c 100644
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@@ -16,7 +16,7 @@
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/Dominance.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/Passes.h"
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index 39874b1bc44a..df7bafc4b90b 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -15,7 +15,7 @@
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
index 1e0442179bf4..e28a97c18231 100644
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -16,7 +16,7 @@ add_mlir_library(MLIRTransformUtils
target_link_libraries(MLIRTransformUtils
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRAnalysis
MLIRLoopAnalysis
MLIRLoopOps
diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
index 47a3fcf3d519..9ed4283101af 100644
--- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
@@ -16,7 +16,7 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BlockAndValueMapping.h"
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 96b4e72eff48..4f6f2b748fec 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -16,7 +16,7 @@
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BlockAndValueMapping.h"
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
index 844befecea36..4ab773eb38df 100644
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@@ -18,7 +18,7 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/Dominance.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Module.h"
diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp
index 75a7d4d5cf6b..b89702b50188 100644
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@@ -15,7 +15,7 @@
#include "mlir/Analysis/NestedMatcher.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorUtils.h"
diff --git a/mlir/test/Dialect/AffineOps/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/canonicalize.mlir
rename to mlir/test/Dialect/Affine/canonicalize.mlir
diff --git a/mlir/test/Dialect/AffineOps/dma.mlir b/mlir/test/Dialect/Affine/dma.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/dma.mlir
rename to mlir/test/Dialect/Affine/dma.mlir
diff --git a/mlir/test/Dialect/AffineOps/inlining.mlir b/mlir/test/Dialect/Affine/inlining.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/inlining.mlir
rename to mlir/test/Dialect/Affine/inlining.mlir
diff --git a/mlir/test/Dialect/AffineOps/invalid.mlir b/mlir/test/Dialect/Affine/invalid.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/invalid.mlir
rename to mlir/test/Dialect/Affine/invalid.mlir
diff --git a/mlir/test/Dialect/AffineOps/load-store-invalid.mlir b/mlir/test/Dialect/Affine/load-store-invalid.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/load-store-invalid.mlir
rename to mlir/test/Dialect/Affine/load-store-invalid.mlir
diff --git a/mlir/test/Dialect/AffineOps/load-store.mlir b/mlir/test/Dialect/Affine/load-store.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/load-store.mlir
rename to mlir/test/Dialect/Affine/load-store.mlir
diff --git a/mlir/test/Dialect/AffineOps/memref-stride-calculation.mlir b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/memref-stride-calculation.mlir
rename to mlir/test/Dialect/Affine/memref-stride-calculation.mlir
diff --git a/mlir/test/Dialect/AffineOps/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir
similarity index 100%
rename from mlir/test/Dialect/AffineOps/ops.mlir
rename to mlir/test/Dialect/Affine/ops.mlir
diff --git a/mlir/test/EDSC/CMakeLists.txt b/mlir/test/EDSC/CMakeLists.txt
index c3c0cf35497b..6c2f5f9fd0be 100644
--- a/mlir/test/EDSC/CMakeLists.txt
+++ b/mlir/test/EDSC/CMakeLists.txt
@@ -6,7 +6,7 @@ llvm_update_compile_flags(mlir-edsc-builder-api-test)
target_link_libraries(mlir-edsc-builder-api-test
PRIVATE
- MLIRAffineOps
+ MLIRAffine
MLIREDSC
MLIRIR
MLIRLinalgEDSC
@@ -22,7 +22,7 @@ target_link_libraries(mlir-edsc-builder-api-test
target_include_directories(mlir-edsc-builder-api-test PRIVATE ..)
whole_archive_link(mlir-edsc-builder-api-test
- MLIRAffineOps
+ MLIRAffine
MLIRLinalgOps
MLIRLoopOps
MLIRStandardOps
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
index b60d5894df03..a9bb0b62dbdf 100644
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -8,7 +8,7 @@
// RUN: mlir-edsc-builder-api-test | FileCheck %s -dump-input-on-failure
-#include "mlir/Dialect/AffineOps/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/LoopOps/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
@@ -38,7 +38,7 @@ using namespace mlir::edsc::intrinsics;
static MLIRContext &globalContext() {
static bool init_once = []() {
- registerDialect<AffineOpsDialect>();
+ registerDialect<AffineDialect>();
registerDialect<linalg::LinalgDialect>();
registerDialect<loop::LoopOpsDialect>();
registerDialect<StandardOpsDialect>();
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
new file mode 100644
index 000000000000..905d1c1adf3d
--- /dev/null
+++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_llvm_library(MLIRAffineTransformsTestPasses
+ TestAffineDataCopy.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR
+ )
+
+target_link_libraries(MLIRAffineTransformsTestPasses PRIVATE
+ MLIRIR
+ MLIRPass
+ MLIRAffineTransforms
+ MLIRSupport
+ )
diff --git a/mlir/test/lib/Transforms/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
similarity index 98%
rename from mlir/test/lib/Transforms/TestAffineDataCopy.cpp
rename to mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index 966df287359a..af380d8ca212 100644
--- a/mlir/test/lib/Transforms/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -13,7 +13,7 @@
#include "mlir/Analysis/Passes.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/Passes.h"
diff --git a/mlir/test/lib/Dialect/CMakeLists.txt b/mlir/test/lib/Dialect/CMakeLists.txt
index cc1766c6127a..160fe9c203da 100644
--- a/mlir/test/lib/Dialect/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/CMakeLists.txt
@@ -1 +1,2 @@
+add_subdirectory(Affine)
add_subdirectory(SPIRV)
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 61d1443869a9..e90099203b43 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(MLIRTestTransforms
- TestAffineDataCopy.cpp
TestAllReduceLowering.cpp
TestCallGraph.cpp
TestConstantFold.cpp
@@ -36,7 +35,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms)
target_link_libraries(MLIRTestTransforms
PUBLIC
- MLIRAffineOps
+ MLIRAffine
MLIRAnalysis
MLIREDSC
MLIRGPU
diff --git a/mlir/test/lib/Transforms/TestConstantFold.cpp b/mlir/test/lib/Transforms/TestConstantFold.cpp
index f45b0ae18d50..cc6ece7f7c46 100644
--- a/mlir/test/lib/Transforms/TestConstantFold.cpp
+++ b/mlir/test/lib/Transforms/TestConstantFold.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
diff --git a/mlir/test/lib/Transforms/TestLoopFusion.cpp b/mlir/test/lib/Transforms/TestLoopFusion.cpp
index d650288836d1..8860b4adfce7 100644
--- a/mlir/test/lib/Transforms/TestLoopFusion.cpp
+++ b/mlir/test/lib/Transforms/TestLoopFusion.cpp
@@ -14,7 +14,7 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/Passes.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
diff --git a/mlir/test/lib/Transforms/TestMemRefBoundCheck.cpp b/mlir/test/lib/Transforms/TestMemRefBoundCheck.cpp
index e107bf81cfe2..a30631f139c1 100644
--- a/mlir/test/lib/Transforms/TestMemRefBoundCheck.cpp
+++ b/mlir/test/lib/Transforms/TestMemRefBoundCheck.cpp
@@ -16,7 +16,7 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/Passes.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
diff --git a/mlir/test/lib/Transforms/TestMemRefDependenceCheck.cpp b/mlir/test/lib/Transforms/TestMemRefDependenceCheck.cpp
index e2d0c873f959..3c80f5baf348 100644
--- a/mlir/test/lib/Transforms/TestMemRefDependenceCheck.cpp
+++ b/mlir/test/lib/Transforms/TestMemRefDependenceCheck.cpp
@@ -14,7 +14,7 @@
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/Passes.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
diff --git a/mlir/test/lib/Transforms/TestParallelismDetection.cpp b/mlir/test/lib/Transforms/TestParallelismDetection.cpp
index 7c16a259723f..1041c78276b3 100644
--- a/mlir/test/lib/Transforms/TestParallelismDetection.cpp
+++ b/mlir/test/lib/Transforms/TestParallelismDetection.cpp
@@ -12,7 +12,7 @@
#include "mlir/Analysis/Passes.h"
#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
diff --git a/mlir/test/lib/Transforms/TestVectorizationUtils.cpp b/mlir/test/lib/Transforms/TestVectorizationUtils.cpp
index 4ae4509bc56d..af42c90521d4 100644
--- a/mlir/test/lib/Transforms/TestVectorizationUtils.cpp
+++ b/mlir/test/lib/Transforms/TestVectorizationUtils.cpp
@@ -13,7 +13,7 @@
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/NestedMatcher.h"
#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Vector/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Diagnostics.h"
diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt
index 9ade33aceabe..2ae36d466dd4 100644
--- a/mlir/tools/mlir-opt/CMakeLists.txt
+++ b/mlir/tools/mlir-opt/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LIBS
${conversion_libs}
MLIRLoopOpsTransforms
MLIRLoopAnalysis
+ MLIRAffineTransformsTestPasses
MLIRAnalysis
MLIRDialect
MLIREDSC
More information about the Mlir-commits
mailing list