[Mlir-commits] [mlir] 1a86559 - [mlir][VectorToGPU] Add conversion for scf::For op with Matrix operands

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Jun 24 15:44:36 PDT 2021


Author: thomasraoux
Date: 2021-06-24T15:42:28-07:00
New Revision: 1a86559276411d01c8b5fc381c16d9265da4e54a

URL: https://github.com/llvm/llvm-project/commit/1a86559276411d01c8b5fc381c16d9265da4e54a
DIFF: https://github.com/llvm/llvm-project/commit/1a86559276411d01c8b5fc381c16d9265da4e54a.diff

LOG: [mlir][VectorToGPU] Add conversion for scf::For op with Matrix operands

Differential Revision: https://reviews.llvm.org/D104134

Added: 
    

Modified: 
    mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
    mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
index 0fc7944d5df73..869301fe260bb 100644
--- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
+++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Dialect/Vector/VectorUtils.h"
@@ -123,6 +124,8 @@ static bool constantSupportsMMAMatrixType(ConstantOp constantOp) {
 }
 
 static bool supportsMMaMatrixType(Operation *op) {
+  if (isa<scf::ForOp, scf::YieldOp>(op))
+    return true;
   if (auto transferRead = dyn_cast<vector::TransferReadOp>(op))
     return transferReadSupportsMMAMatrixType(transferRead);
   if (auto transferWrite = dyn_cast<vector::TransferWriteOp>(op))
@@ -326,6 +329,74 @@ static void convertConstantOp(ConstantOp op,
   valueMapping[op.getResult()] = matrix;
 }
 
+// Replace ForOp with a new ForOp with extra operands. The YieldOp is not
+// updated and needs to be updated separatly for the loop to be correct.
+static scf::ForOp replaceForOpWithNewSignature(OpBuilder &b, scf::ForOp loop,
+                                               ValueRange newIterOperands) {
+  // Create a new loop before the existing one, with the extra operands.
+  OpBuilder::InsertionGuard g(b);
+  b.setInsertionPoint(loop);
+  auto operands = llvm::to_vector<4>(loop.getIterOperands());
+  operands.append(newIterOperands.begin(), newIterOperands.end());
+  scf::ForOp newLoop =
+      b.create<scf::ForOp>(loop.getLoc(), loop.lowerBound(), loop.upperBound(),
+                           loop.step(), operands);
+  newLoop.getBody()->erase();
+  newLoop.getLoopBody().getBlocks().splice(
+      newLoop.getLoopBody().getBlocks().begin(),
+      loop.getLoopBody().getBlocks());
+  for (auto operand : newIterOperands)
+    newLoop.getBody()->addArgument(operand.getType());
+
+  for (auto it : llvm::zip(loop.getResults(), newLoop.getResults().take_front(
+                                                  loop.getNumResults())))
+    std::get<0>(it).replaceAllUsesWith(std::get<1>(it));
+  loop.erase();
+  return newLoop;
+}
+
+static void convertForOp(scf::ForOp op,
+                         llvm::DenseMap<Value, Value> &valueMapping) {
+  SmallVector<Value> newOperands;
+  SmallVector<std::pair<size_t, size_t>> argMapping;
+  for (auto operand : llvm::enumerate(op.getIterOperands())) {
+    auto it = valueMapping.find(operand.value());
+    if (it == valueMapping.end())
+      continue;
+    argMapping.push_back(std::make_pair(
+        operand.index(), op.getNumIterOperands() + newOperands.size()));
+    newOperands.push_back(it->second);
+  }
+  OpBuilder b(op);
+  scf::ForOp newForOp = replaceForOpWithNewSignature(b, op, newOperands);
+  Block &loopBody = *newForOp.getBody();
+  for (auto mapping : argMapping) {
+    valueMapping[newForOp.getResult(mapping.first)] =
+        newForOp.getResult(mapping.second);
+    valueMapping[loopBody.getArgument(mapping.first +
+                                      newForOp.getNumInductionVars())] =
+        loopBody.getArgument(mapping.second + newForOp.getNumInductionVars());
+  }
+}
+
+static void convertYieldOp(scf::YieldOp op,
+                           llvm::DenseMap<Value, Value> &valueMapping) {
+  OpBuilder b(op);
+  auto loop = cast<scf::ForOp>(op->getParentOp());
+  auto yieldOperands = llvm::to_vector<4>(op.getOperands());
+  for (auto operand : llvm::enumerate(op.getOperands())) {
+    auto it = valueMapping.find(operand.value());
+    if (it == valueMapping.end())
+      continue;
+    // Replace the yield of old value with the for op argument to make it easier
+    // to remove the dead code.
+    yieldOperands[operand.index()] = loop.getIterOperands()[operand.index()];
+    yieldOperands.push_back(it->second);
+  }
+  b.create<scf::YieldOp>(op.getLoc(), yieldOperands);
+  op.erase();
+}
+
 namespace mlir {
 
 void populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns) {
@@ -345,6 +416,10 @@ void convertVectorToMMAOps(FuncOp funcOp) {
       convertContractOp(contractOp, valueMapping);
     } else if (auto constantOp = dyn_cast<ConstantOp>(op)) {
       convertConstantOp(constantOp, valueMapping);
+    } else if (auto forOp = dyn_cast<scf::ForOp>(op)) {
+      convertForOp(forOp, valueMapping);
+    } else if (auto yiledOp = dyn_cast<scf::YieldOp>(op)) {
+      convertYieldOp(yiledOp, valueMapping);
     }
   }
 }

diff  --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
index d0b7d68e8c829..a7fa5796efc58 100644
--- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
+++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
@@ -41,9 +41,15 @@ func @matmul_cst(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memr
   return
 }
 
-// Negative test until scf.for support is added.
 // CHECK-LABEL: func @matmul_loop
-//       CHECK:   vector.contract
+//       CHECK:   %[[C:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "COp">
+//       CHECK:   %[[ACC:.+]] = scf.for {{.*}} iter_args(%[[ACC1:.+]] = %[[C]]) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
+//   CHECK-DAG:     %[[A:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "AOp">
+//   CHECK-DAG:     %[[B:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "BOp">
+//  CHECK-NEXT:     %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[ACC1]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
+//  CHECK-NEXT:     scf.yield %[[D]] : !gpu.mma_matrix<16x16xf16, "COp">
+//  CHECK-NEXT:   }
+//  CHECK-NEXT:   gpu.subgroup_mma_store_matrix %[[ACC]], %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<128x128xf16>
 func @matmul_loop(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
   %c0 = constant 0 : index
   %c128 = constant 128 : index


        


More information about the Mlir-commits mailing list