[Mlir-commits] [mlir] 1a86559 - [mlir][VectorToGPU] Add conversion for scf::For op with Matrix operands
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Jun 24 15:44:36 PDT 2021
Author: thomasraoux
Date: 2021-06-24T15:42:28-07:00
New Revision: 1a86559276411d01c8b5fc381c16d9265da4e54a
URL: https://github.com/llvm/llvm-project/commit/1a86559276411d01c8b5fc381c16d9265da4e54a
DIFF: https://github.com/llvm/llvm-project/commit/1a86559276411d01c8b5fc381c16d9265da4e54a.diff
LOG: [mlir][VectorToGPU] Add conversion for scf::For op with Matrix operands
Differential Revision: https://reviews.llvm.org/D104134
Added:
Modified:
mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
index 0fc7944d5df73..869301fe260bb 100644
--- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
+++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
@@ -18,6 +18,7 @@
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorUtils.h"
@@ -123,6 +124,8 @@ static bool constantSupportsMMAMatrixType(ConstantOp constantOp) {
}
static bool supportsMMaMatrixType(Operation *op) {
+ if (isa<scf::ForOp, scf::YieldOp>(op))
+ return true;
if (auto transferRead = dyn_cast<vector::TransferReadOp>(op))
return transferReadSupportsMMAMatrixType(transferRead);
if (auto transferWrite = dyn_cast<vector::TransferWriteOp>(op))
@@ -326,6 +329,74 @@ static void convertConstantOp(ConstantOp op,
valueMapping[op.getResult()] = matrix;
}
+// Replace ForOp with a new ForOp with extra operands. The YieldOp is not
+// updated and needs to be updated separatly for the loop to be correct.
+static scf::ForOp replaceForOpWithNewSignature(OpBuilder &b, scf::ForOp loop,
+ ValueRange newIterOperands) {
+ // Create a new loop before the existing one, with the extra operands.
+ OpBuilder::InsertionGuard g(b);
+ b.setInsertionPoint(loop);
+ auto operands = llvm::to_vector<4>(loop.getIterOperands());
+ operands.append(newIterOperands.begin(), newIterOperands.end());
+ scf::ForOp newLoop =
+ b.create<scf::ForOp>(loop.getLoc(), loop.lowerBound(), loop.upperBound(),
+ loop.step(), operands);
+ newLoop.getBody()->erase();
+ newLoop.getLoopBody().getBlocks().splice(
+ newLoop.getLoopBody().getBlocks().begin(),
+ loop.getLoopBody().getBlocks());
+ for (auto operand : newIterOperands)
+ newLoop.getBody()->addArgument(operand.getType());
+
+ for (auto it : llvm::zip(loop.getResults(), newLoop.getResults().take_front(
+ loop.getNumResults())))
+ std::get<0>(it).replaceAllUsesWith(std::get<1>(it));
+ loop.erase();
+ return newLoop;
+}
+
+static void convertForOp(scf::ForOp op,
+ llvm::DenseMap<Value, Value> &valueMapping) {
+ SmallVector<Value> newOperands;
+ SmallVector<std::pair<size_t, size_t>> argMapping;
+ for (auto operand : llvm::enumerate(op.getIterOperands())) {
+ auto it = valueMapping.find(operand.value());
+ if (it == valueMapping.end())
+ continue;
+ argMapping.push_back(std::make_pair(
+ operand.index(), op.getNumIterOperands() + newOperands.size()));
+ newOperands.push_back(it->second);
+ }
+ OpBuilder b(op);
+ scf::ForOp newForOp = replaceForOpWithNewSignature(b, op, newOperands);
+ Block &loopBody = *newForOp.getBody();
+ for (auto mapping : argMapping) {
+ valueMapping[newForOp.getResult(mapping.first)] =
+ newForOp.getResult(mapping.second);
+ valueMapping[loopBody.getArgument(mapping.first +
+ newForOp.getNumInductionVars())] =
+ loopBody.getArgument(mapping.second + newForOp.getNumInductionVars());
+ }
+}
+
+static void convertYieldOp(scf::YieldOp op,
+ llvm::DenseMap<Value, Value> &valueMapping) {
+ OpBuilder b(op);
+ auto loop = cast<scf::ForOp>(op->getParentOp());
+ auto yieldOperands = llvm::to_vector<4>(op.getOperands());
+ for (auto operand : llvm::enumerate(op.getOperands())) {
+ auto it = valueMapping.find(operand.value());
+ if (it == valueMapping.end())
+ continue;
+ // Replace the yield of old value with the for op argument to make it easier
+ // to remove the dead code.
+ yieldOperands[operand.index()] = loop.getIterOperands()[operand.index()];
+ yieldOperands.push_back(it->second);
+ }
+ b.create<scf::YieldOp>(op.getLoc(), yieldOperands);
+ op.erase();
+}
+
namespace mlir {
void populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns) {
@@ -345,6 +416,10 @@ void convertVectorToMMAOps(FuncOp funcOp) {
convertContractOp(contractOp, valueMapping);
} else if (auto constantOp = dyn_cast<ConstantOp>(op)) {
convertConstantOp(constantOp, valueMapping);
+ } else if (auto forOp = dyn_cast<scf::ForOp>(op)) {
+ convertForOp(forOp, valueMapping);
+ } else if (auto yiledOp = dyn_cast<scf::YieldOp>(op)) {
+ convertYieldOp(yiledOp, valueMapping);
}
}
}
diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
index d0b7d68e8c829..a7fa5796efc58 100644
--- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
+++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
@@ -41,9 +41,15 @@ func @matmul_cst(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memr
return
}
-// Negative test until scf.for support is added.
// CHECK-LABEL: func @matmul_loop
-// CHECK: vector.contract
+// CHECK: %[[C:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "COp">
+// CHECK: %[[ACC:.+]] = scf.for {{.*}} iter_args(%[[ACC1:.+]] = %[[C]]) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
+// CHECK-DAG: %[[A:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "AOp">
+// CHECK-DAG: %[[B:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "BOp">
+// CHECK-NEXT: %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[ACC1]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
+// CHECK-NEXT: scf.yield %[[D]] : !gpu.mma_matrix<16x16xf16, "COp">
+// CHECK-NEXT: }
+// CHECK-NEXT: gpu.subgroup_mma_store_matrix %[[ACC]], %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<128x128xf16>
func @matmul_loop(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
%c0 = constant 0 : index
%c128 = constant 128 : index
More information about the Mlir-commits
mailing list