[Mlir-commits] [mlir] d6ef3d2 - [mlir] Remove VectorToROCDL

Krzysztof Drewniak llvmlistbot at llvm.org
Tue Jul 12 08:21:28 PDT 2022


Author: Krzysztof Drewniak
Date: 2022-07-12T15:21:22Z
New Revision: d6ef3d20b4e3768dc30fb229dfa938d8059fffef

URL: https://github.com/llvm/llvm-project/commit/d6ef3d20b4e3768dc30fb229dfa938d8059fffef
DIFF: https://github.com/llvm/llvm-project/commit/d6ef3d20b4e3768dc30fb229dfa938d8059fffef.diff

LOG: [mlir] Remove VectorToROCDL

Between issues such as
https://github.com/llvm/llvm-project/issues/56323, the fact that this
lowering (unlike the code in amdgpu-to-rocdl) does not correctly set
up bounds checks (and thus will cause page faults on reads that might
need to be padded instead), and that fixing these problems would,
essentially, involve replicating amdgpu-to-rocdl, remove
--vector-to-rocdl for being broken. In addition, the lowering does not
support many aspects of transfer_{read,write}, like supervectors, and
may not work correctly in their presence.

We (the MLIR-based convolution generator at AMD) do not use this
conversion pass, nor are we aware of any other clients.

Migration strategies:
- Use VectorToLLVM
- If buffer ops are particularly needed in your application, use
amdgpu.raw_buffer_{load,store}

A VectorToAMDGPU pass may be introduced in the future.

Reviewed By: ThomasRaoux

Differential Revision: https://reviews.llvm.org/D129308

Added: 
    

Modified: 
    clang/docs/ClangFormattedStatus.rst
    clang/docs/tools/clang-formatted-files.txt
    mlir/include/mlir/Conversion/Passes.h
    mlir/include/mlir/Conversion/Passes.td
    mlir/lib/Conversion/CMakeLists.txt
    mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
    mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
    mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
    utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

Removed: 
    mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
    mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt
    mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
    mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir


################################################################################
diff  --git a/clang/docs/ClangFormattedStatus.rst b/clang/docs/ClangFormattedStatus.rst
index 7e714f67ffe3c..6f62c97f85ba9 100644
--- a/clang/docs/ClangFormattedStatus.rst
+++ b/clang/docs/ClangFormattedStatus.rst
@@ -6829,11 +6829,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: March 06, 2022 17:32:2
      - `1`
      - `0`
      - :good:`100%`
-   * - mlir/include/mlir/Conversion/VectorToROCDL
-     - `1`
-     - `1`
-     - `0`
-     - :good:`100%`
    * - mlir/include/mlir/Conversion/VectorToSCF
      - `1`
      - `1`
@@ -7609,11 +7604,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: March 06, 2022 17:32:2
      - `2`
      - `0`
      - :good:`100%`
-   * - mlir/lib/Conversion/VectorToROCDL
-     - `1`
-     - `1`
-     - `0`
-     - :good:`100%`
    * - mlir/lib/Conversion/VectorToSCF
      - `1`
      - `1`

diff  --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt
index 5e1c6f130a0e4..cc55f9b4d85f4 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -7627,7 +7627,6 @@ mlir/include/mlir/Conversion/TosaToSCF/TosaToSCF.h
 mlir/include/mlir/Conversion/TosaToStandard/TosaToStandard.h
 mlir/include/mlir/Conversion/VectorToGPU/VectorToGPU.h
 mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h
-mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
 mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h
 mlir/include/mlir/Conversion/VectorToSPIRV/VectorToSPIRV.h
 mlir/include/mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h
@@ -8068,7 +8067,6 @@ mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp
 mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp
 mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
 mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
-mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
 mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
 mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRVPass.cpp
 mlir/lib/Dialect/Traits.cpp

diff  --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
index 5ffdc2d3e5af7..9f10e9459f450 100644
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -57,7 +57,6 @@
 #include "mlir/Conversion/TosaToTensor/TosaToTensor.h"
 #include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
-#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
 #include "mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h"
 

diff  --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 3098e7a73be44..d2a076f21ac2d 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -854,12 +854,12 @@ def ConvertVectorToGPU : Pass<"convert-vector-to-gpu"> {
                 "dialect";
   let constructor = "mlir::createConvertVectorToGPUPass()";
   let dependentDialects = [
-    "memref::MemRefDialect", "gpu::GPUDialect", "AffineDialect", 
+    "memref::MemRefDialect", "gpu::GPUDialect", "AffineDialect",
     "vector::VectorDialect", "nvgpu::NVGPUDialect"
   ];
 
   let options = [
-    Option<"useNvGpu", "use-nvgpu", "bool", /*default=*/"false", 
+    Option<"useNvGpu", "use-nvgpu", "bool", /*default=*/"false",
       "convert to NvGPU ops instead of GPU dialect ops">
   ];
 }
@@ -937,17 +937,6 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> {
   ];
 }
 
-//===----------------------------------------------------------------------===//
-// VectorToROCDL
-//===----------------------------------------------------------------------===//
-
-def ConvertVectorToROCDL : Pass<"convert-vector-to-rocdl", "ModuleOp"> {
-  let summary = "Lower the operations from the vector dialect into the ROCDL "
-                "dialect";
-  let constructor = "mlir::createConvertVectorToROCDLPass()";
-  let dependentDialects = ["ROCDL::ROCDLDialect"];
-}
-
 //===----------------------------------------------------------------------===//
 // VectorToSPIRV
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h b/mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
deleted file mode 100644
index bb65899387ce0..0000000000000
--- a/mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//===- VectorToROCDL.h - Convert Vector to ROCDL dialect ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_CONVERSION_VECTORTOROCDL_VECTORTOROCDL_H_
-#define MLIR_CONVERSION_VECTORTOROCDL_VECTORTOROCDL_H_
-
-#include <memory>
-
-namespace mlir {
-class LLVMTypeConverter;
-class ModuleOp;
-template <typename OpT>
-class OperationPass;
-class RewritePatternSet;
-
-/// Collect a set of patterns to convert from the GPU dialect to ROCDL.
-void populateVectorToROCDLConversionPatterns(LLVMTypeConverter &converter,
-                                             RewritePatternSet &patterns);
-
-/// Create a pass to convert vector operations to the ROCDL dialect.
-std::unique_ptr<OperationPass<ModuleOp>> createConvertVectorToROCDLPass();
-
-} // namespace mlir
-#endif // MLIR_CONVERSION_VECTORTOROCDL_VECTORTOROCDL_H_

diff  --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index 218c89a445864..34488e7af9af6 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -44,7 +44,6 @@ add_subdirectory(TosaToArith)
 add_subdirectory(TosaToLinalg)
 add_subdirectory(TosaToSCF)
 add_subdirectory(TosaToTensor)
-add_subdirectory(VectorToROCDL)
 add_subdirectory(VectorToLLVM)
 add_subdirectory(VectorToGPU)
 add_subdirectory(VectorToSCF)

diff  --git a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
index 546f7a63cacfd..f78a747f67e71 100644
--- a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
@@ -20,5 +20,4 @@ add_mlir_conversion_library(MLIRGPUToROCDLTransforms
   MLIRMemRefToLLVM
   MLIRROCDLDialect
   MLIRPass
-  MLIRVectorToROCDL
   )

diff  --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 1704f6ede4ef5..57e4a15e0d53b 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -23,7 +23,6 @@
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
-#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
@@ -96,7 +95,6 @@ struct LowerGpuOpsToROCDLOpsPass
     populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
                                             *maybeChipset);
     populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
-    populateVectorToROCDLConversionPatterns(converter, llvmPatterns);
     cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
     populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
     populateMemRefToLLVMConversionPatterns(converter, llvmPatterns);

diff  --git a/mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt b/mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt
deleted file mode 100644
index f9be789e64a4c..0000000000000
--- a/mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-add_mlir_conversion_library(MLIRVectorToROCDL
-  VectorToROCDL.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToROCDL
-
-  DEPENDS
-  MLIRConversionPassIncGen
-  intrinsics_gen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRFuncToLLVM
-  MLIRROCDLDialect
-  MLIRLLVMCommonConversion
-  MLIRMemRefToLLVM
-  MLIRVectorDialect
-  MLIRTransforms
-  )

diff  --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
deleted file mode 100644
index 1890c0ee16192..0000000000000
--- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-//===- VectorToROCDL.cpp - Vector to ROCDL lowering passes ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass to generate ROCDLIR operations for higher-level
-// Vector operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
-
-#include "../PassDetail.h"
-#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
-#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
-#include "mlir/Conversion/LLVMCommon/Pattern.h"
-#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
-#include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
-#include "mlir/Dialect/Vector/IR/VectorOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-using namespace mlir;
-using namespace mlir::vector;
-
-static LogicalResult replaceTransferOpWithMubuf(
-    ConversionPatternRewriter &rewriter, ValueRange operands,
-    LLVMTypeConverter &typeConverter, Location loc, TransferReadOp xferOp,
-    Type &vecTy, Value &dwordConfig, Value &vindex, Value &offsetSizeInBytes,
-    Value &glc, Value &slc) {
-  rewriter.replaceOpWithNewOp<ROCDL::MubufLoadOp>(
-      xferOp, vecTy, dwordConfig, vindex, offsetSizeInBytes, glc, slc);
-  return success();
-}
-
-static LogicalResult replaceTransferOpWithMubuf(
-    ConversionPatternRewriter &rewriter, ValueRange operands,
-    LLVMTypeConverter &typeConverter, Location loc, TransferWriteOp xferOp,
-    Type &vecTy, Value &dwordConfig, Value &vindex, Value &offsetSizeInBytes,
-    Value &glc, Value &slc) {
-  auto adaptor = TransferWriteOpAdaptor(operands, xferOp->getAttrDictionary());
-  rewriter.replaceOpWithNewOp<ROCDL::MubufStoreOp>(xferOp, adaptor.getVector(),
-                                                   dwordConfig, vindex,
-                                                   offsetSizeInBytes, glc, slc);
-  return success();
-}
-
-namespace {
-/// Conversion pattern that converts a 1-D vector transfer read/write.
-/// Note that this conversion pass only converts vector x2 or x4 f32
-/// types. For unsupported cases, they will fall back to the vector to
-/// llvm conversion pattern.
-template <typename ConcreteOp>
-class VectorTransferConversion : public ConvertOpToLLVMPattern<ConcreteOp> {
-public:
-  using ConvertOpToLLVMPattern<ConcreteOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(ConcreteOp xferOp, typename ConcreteOp::Adaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    // TODO: support 0-d corner case.
-    if (xferOp.getTransferRank() == 0)
-      return failure();
-
-    if (xferOp.getVectorType().getRank() > 1 ||
-        llvm::size(xferOp.getIndices()) == 0)
-      return failure();
-
-    if (!xferOp.getPermutationMap().isMinorIdentity())
-      return failure();
-
-    // Have it handled in vector->llvm conversion pass.
-    if (xferOp.isDimInBounds(0))
-      return failure();
-
-    auto toLLVMTy = [&](Type t) {
-      return this->getTypeConverter()->convertType(t);
-    };
-    auto vecTy = toLLVMTy(xferOp.getVectorType());
-    unsigned vecWidth = LLVM::getVectorNumElements(vecTy).getFixedValue();
-    Location loc = xferOp->getLoc();
-
-    // The backend result vector scalarization have trouble scalarize
-    // <1 x ty> result, exclude the x1 width from the lowering.
-    if (vecWidth != 2 && vecWidth != 4)
-      return failure();
-
-    // Obtain dataPtr and elementType from the memref.
-    auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
-    if (!memRefType)
-      return failure();
-    // MUBUF instruction operate only on addresspace 0(unified) or 1(global)
-    // In case of 3(LDS): fall back to vector->llvm pass
-    // In case of 5(VGPR): wrong
-    if ((memRefType.getMemorySpaceAsInt() != 0) &&
-        (memRefType.getMemorySpaceAsInt() != 1))
-      return failure();
-
-    // Note that the dataPtr starts at the offset address specified by
-    // indices, so no need to calculate offset size in bytes again in
-    // the MUBUF instruction.
-    Value dataPtr = this->getStridedElementPtr(
-        loc, memRefType, adaptor.getSource(), adaptor.getIndices(), rewriter);
-
-    // 1. Create and fill a <4 x i32> dwordConfig with:
-    //    1st two elements holding the address of dataPtr.
-    //    3rd element: -1.
-    //    4th element: 0x27000.
-    SmallVector<int32_t, 4> constConfigAttr{0, 0, -1, 0x27000};
-    Type i32Ty = rewriter.getIntegerType(32);
-    VectorType i32Vecx4 = VectorType::get(4, i32Ty);
-    Value constConfig = rewriter.create<LLVM::ConstantOp>(
-        loc, toLLVMTy(i32Vecx4),
-        DenseElementsAttr::get(i32Vecx4, ArrayRef<int32_t>(constConfigAttr)));
-
-    // Treat first two element of <4 x i32> as i64, and save the dataPtr
-    // to it.
-    Type i64Ty = rewriter.getIntegerType(64);
-    Value i64x2Ty = rewriter.create<LLVM::BitcastOp>(
-        loc, LLVM::getFixedVectorType(toLLVMTy(i64Ty), 2), constConfig);
-    Value dataPtrAsI64 = rewriter.create<LLVM::PtrToIntOp>(
-        loc, toLLVMTy(i64Ty).template cast<Type>(), dataPtr);
-    Value zero = this->createIndexConstant(rewriter, loc, 0);
-    Value dwordConfig = rewriter.create<LLVM::InsertElementOp>(
-        loc, LLVM::getFixedVectorType(toLLVMTy(i64Ty), 2), i64x2Ty,
-        dataPtrAsI64, zero);
-    dwordConfig =
-        rewriter.create<LLVM::BitcastOp>(loc, toLLVMTy(i32Vecx4), dwordConfig);
-
-    // 2. Rewrite op as a buffer read or write.
-    Value int1False = rewriter.create<LLVM::ConstantOp>(
-        loc, toLLVMTy(rewriter.getIntegerType(1)),
-        rewriter.getIntegerAttr(rewriter.getIntegerType(1), 0));
-    Value int32Zero = rewriter.create<LLVM::ConstantOp>(
-        loc, toLLVMTy(i32Ty),
-        rewriter.getIntegerAttr(rewriter.getIntegerType(32), 0));
-    return replaceTransferOpWithMubuf(
-        rewriter, adaptor.getOperands(), *this->getTypeConverter(), loc, xferOp,
-        vecTy, dwordConfig, int32Zero, int32Zero, int1False, int1False);
-  }
-};
-} // namespace
-
-void mlir::populateVectorToROCDLConversionPatterns(
-    LLVMTypeConverter &converter, RewritePatternSet &patterns) {
-  patterns.add<VectorTransferConversion<TransferReadOp>,
-               VectorTransferConversion<TransferWriteOp>>(converter);
-}
-
-namespace {
-struct LowerVectorToROCDLPass
-    : public ConvertVectorToROCDLBase<LowerVectorToROCDLPass> {
-  void runOnOperation() override;
-};
-} // namespace
-
-void LowerVectorToROCDLPass::runOnOperation() {
-  LLVMTypeConverter converter(&getContext());
-  RewritePatternSet patterns(&getContext());
-
-  populateVectorToROCDLConversionPatterns(converter, patterns);
-  populateMemRefToLLVMConversionPatterns(converter, patterns);
-  populateFuncToLLVMConversionPatterns(converter, patterns);
-
-  LLVMConversionTarget target(getContext());
-  target.addLegalDialect<ROCDL::ROCDLDialect>();
-
-  if (failed(
-          applyPartialConversion(getOperation(), target, std::move(patterns))))
-    signalPassFailure();
-}
-
-std::unique_ptr<OperationPass<ModuleOp>>
-mlir::createConvertVectorToROCDLPass() {
-  return std::make_unique<LowerVectorToROCDLPass>();
-}

diff  --git a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
deleted file mode 100644
index c10ef3867da3e..0000000000000
--- a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
+++ /dev/null
@@ -1,68 +0,0 @@
-// RUN: mlir-opt %s -convert-vector-to-rocdl | FileCheck %s
-
-gpu.module @test_read{
-func.func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
-  %f0 = arith.constant 0.0: f32
-  %f = vector.transfer_read %A[%base], %f0
-      {permutation_map = affine_map<(d0) -> (d0)>} :
-    memref<?xf32>, vector<2xf32>
-  return %f: vector<2xf32>
-}
-// CHECK-LABEL: @transfer_readx2
-// CHECK: rocdl.buffer.load {{.*}} vector<2xf32>
-
-func.func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
-  %f0 = arith.constant 0.0: f32
-  %f = vector.transfer_read %A[%base], %f0
-      {permutation_map = affine_map<(d0) -> (d0)>} :
-    memref<?xf32>, vector<4xf32>
-  return %f: vector<4xf32>
-}
-// CHECK-LABEL: @transfer_readx4
-// CHECK: rocdl.buffer.load {{.*}} vector<4xf32>
-
-func.func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
-  %f0 = arith.constant 0.0: f32
-  %f = vector.transfer_read %A[%base], %f0
-      {permutation_map = affine_map<(d0) -> (d0)>} :
-    memref<?xf32>, vector<4xf32>
-  return %f: vector<4xf32>
-}
-// CHECK-LABEL: @transfer_read_dwordConfig
-// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}
-// CHECK: [0, 0, -1, 159744]
-// CHECK: %[[i64:.*]] = llvm.ptrtoint %[[gep]]
-// CHECK: llvm.insertelement %[[i64]]
-}
-
-gpu.module @test_write{
-func.func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
-  vector.transfer_write %B, %A[%base]
-      {permutation_map = affine_map<(d0) -> (d0)>} :
-    vector<2xf32>, memref<?xf32>
-  return
-}
-// CHECK-LABEL: @transfer_writex2
-// CHECK: rocdl.buffer.store {{.*}} vector<2xf32>
-
-func.func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
-  vector.transfer_write %B, %A[%base]
-      {permutation_map = affine_map<(d0) -> (d0)>} :
-    vector<4xf32>, memref<?xf32>
-  return
-}
-// CHECK-LABEL: @transfer_writex4
-// CHECK: rocdl.buffer.store {{.*}} vector<4xf32>
-
-func.func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
-  vector.transfer_write %B, %A[%base]
-      {permutation_map = affine_map<(d0) -> (d0)>} :
-    vector<2xf32>, memref<?xf32>
-  return
-}
-// CHECK-LABEL: @transfer_write_dwordConfig
-// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}
-// CHECK: [0, 0, -1, 159744]
-// CHECK: %[[i64:.*]] = llvm.ptrtoint %[[gep]]
-// CHECK: llvm.insertelement %[[i64]]
-}

diff  --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
index add327486b7d9..8a7d0361d44f2 100644
--- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s \
 // RUN:   -convert-scf-to-cf \
 // RUN:   -gpu-kernel-outlining \
-// RUN:   -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \
+// RUN:   -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \
 // RUN:   -gpu-to-llvm \
 // RUN: | mlir-cpu-runner \
 // RUN:   --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \
@@ -9,22 +9,21 @@
 // RUN:   --entry-point-result=void \
 // RUN: | FileCheck %s
 
+// TODO: swap for vector transfer reads if we ever create a --vector-to-amdgpu
 func.func @vectransferx2(%arg0 : memref<?xf32>, %arg1 : memref<?xf32>) {
   %cst = arith.constant 1 : index
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, %block_z = %cst) {
     %f0 = arith.constant 0.0: f32
-    %base = arith.constant 0 : index
-    %f = vector.transfer_read %arg0[%base], %f0
-        {permutation_map = affine_map<(d0) -> (d0)>} :
-      memref<?xf32>, vector<2xf32>
+    %base = arith.constant 0 : i32
+    %f = amdgpu.raw_buffer_load {boundsCheck = true } %arg0[%base]
+      : memref<?xf32>, i32 -> vector<2xf32>
 
     %c = arith.addf %f, %f : vector<2xf32>
 
-    %base1 = arith.constant 1 : index
-    vector.transfer_write %c, %arg1[%base1]
-        {permutation_map = affine_map<(d0) -> (d0)>} :
-      vector<2xf32>, memref<?xf32>
+    %base1 = arith.constant 1 : i32
+    amdgpu.raw_buffer_store { boundsCheck = false } %c -> %arg1[%base1]
+      : vector<2xf32> -> memref<?xf32>, i32
 
     gpu.terminator
   }
@@ -36,16 +35,14 @@ func.func @vectransferx4(%arg0 : memref<?xf32>, %arg1 : memref<?xf32>) {
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, %block_z = %cst) {
     %f0 = arith.constant 0.0: f32
-    %base = arith.constant 0 : index
-    %f = vector.transfer_read %arg0[%base], %f0
-        {permutation_map = affine_map<(d0) -> (d0)>} :
-      memref<?xf32>, vector<4xf32>
+    %base = arith.constant 0 : i32
+    %f = amdgpu.raw_buffer_load { boundsCheck = false } %arg0[%base]
+      : memref<?xf32>, i32 -> vector<4xf32>
 
     %c = arith.addf %f, %f : vector<4xf32>
 
-    vector.transfer_write %c, %arg1[%base]
-        {permutation_map = affine_map<(d0) -> (d0)>} :
-      vector<4xf32>, memref<?xf32>
+    amdgpu.raw_buffer_store { boundsCheck = false } %c -> %arg1[%base]
+      : vector<4xf32> -> memref<?xf32>, i32
 
     gpu.terminator
   }

diff  --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 9cb214743eea1..23b3cd29c01b4 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -2614,7 +2614,6 @@ cc_library(
         ":TosaToTensor",
         ":VectorToGPU",
         ":VectorToLLVM",
-        ":VectorToROCDL",
         ":VectorToSCF",
         ":VectorToSPIRV",
     ],
@@ -3804,30 +3803,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "VectorToROCDL",
-    srcs = [
-        "lib/Conversion/VectorToROCDL/VectorToROCDL.cpp",
-        ":ConversionPassDetail",
-    ],
-    hdrs = ["include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h"],
-    includes = ["include"],
-    deps = [
-        ":ConversionPassIncGen",
-        ":FuncDialect",
-        ":FuncToLLVM",
-        ":GPUDialect",
-        ":IR",
-        ":LLVMCommonConversion",
-        ":LLVMDialect",
-        ":MemRefToLLVM",
-        ":Pass",
-        ":ROCDLDialect",
-        ":Transforms",
-        ":VectorDialect",
-    ],
-)
-
 cc_library(
     name = "VectorToSPIRV",
     srcs = glob([
@@ -3895,7 +3870,6 @@ cc_library(
         ":Transforms",
         ":VectorDialect",
         ":VectorToLLVM",
-        ":VectorToROCDL",
         ":VectorToSCF",
         "//llvm:Support",
     ],
@@ -6348,7 +6322,6 @@ cc_library(
         ":TransformsPassIncGen",
         ":VectorDialect",
         ":VectorToLLVM",
-        ":VectorToROCDL",
         ":VectorToSCF",
         ":VectorToSPIRV",
         ":VectorTransforms",


        


More information about the Mlir-commits mailing list