[Mlir-commits] [mlir] Revert "[mlir][amdgpu] Add tensor load store operations (#170918)" (PR #172671)
Erick Ochoa Lopez
llvmlistbot at llvm.org
Wed Dec 17 06:58:53 PST 2025
https://github.com/amd-eochoalo created https://github.com/llvm/llvm-project/pull/172671
This reverts commit ecbb44464a3a5fad090be8c19632b9046f8eb109. Broke ROCM integration tests. Will reland in future commit.
>From af80bc772dd83f217dbb5363419a77351d5a608a Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Wed, 17 Dec 2025 09:57:56 -0500
Subject: [PATCH] Revert "[mlir][amdgpu] Add tensor load store operations
(#170918)"
This reverts commit ecbb44464a3a5fad090be8c19632b9046f8eb109.
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 31 ----------
.../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 57 +++----------------
.../Conversion/AMDGPUToROCDL/gfx1250.mlir | 18 ------
3 files changed, 7 insertions(+), 99 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 4865dc13f324b..96f5f5c6f1a3f 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1587,35 +1587,4 @@ def AMDGPU_MakeDmaDescriptorOp : AMDGPU_MakeDescriptorOp<"make_dma_descriptor">
}
-def AMDGPU_TensorLoadToLDSOp :
- AMDGPU_Op<"tensor_load_to_lds", [MemoryEffects<[MemWrite, MemRead]>]>,
- Arguments<(ins AMDGPU_TDMDescriptorType: $desc)> {
- let summary = "Load tensors from global memory to LDS.";
- let description = [{
- Load tensors of up to five dimensions from global memory to LDS.
-
- This operation was introduced in gfx1250.
- }];
-
- let assemblyFormat = [{
- $desc attr-dict `:` qualified(type($desc))
- }];
-}
-
-def AMDGPU_TensorStoreFromLDSOp :
- AMDGPU_Op<"tensor_store_from_lds", [MemoryEffects<[MemWrite, MemRead]>]>,
- Arguments<(ins AMDGPU_TDMDescriptorType: $desc)> {
-
- let summary = "Store tensors from LDS to global memory.";
- let description = [{
- Store tensors of up to five dimensions from LDS to global memory.
-
- This operation was introduced in gfx1250.
- }];
-
- let assemblyFormat = [{
- $desc attr-dict `:` qualified(type($desc))
- }];
-}
-
#endif // AMDGPU
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 4f3192570640a..541bb02d79eae 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -3218,6 +3218,11 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
Location loc = op.getLoc();
+ IntegerType i32 = rewriter.getI32Type();
+ [[maybe_unused]] Type v4i32 =
+ this->typeConverter->convertType(VectorType::get(4, i32));
+ assert(v4i32 && "expected type conversion to succeed");
+
SmallVector<Value> consts;
for (int64_t i = 0; i < 8; ++i)
consts.push_back(createI32Constant(rewriter, loc, i));
@@ -3232,32 +3237,6 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
}
};
-template <typename SourceOp, typename TargetOp>
-struct AMDGPUTensorLoadStoreOpLowering
- : public ConvertOpToLLVMPattern<SourceOp> {
- using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
- using Adaptor = typename ConvertOpToLLVMPattern<SourceOp>::OneToNOpAdaptor;
- AMDGPUTensorLoadStoreOpLowering(const LLVMTypeConverter &converter,
- Chipset chipset)
- : ConvertOpToLLVMPattern<SourceOp>(converter), chipset(chipset) {}
- Chipset chipset;
-
- LogicalResult
- matchAndRewrite(SourceOp op, Adaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- if (chipset < kGfx1250)
- return op->emitOpError("is only supported on gfx1250");
-
- ValueRange desc = adaptor.getDesc();
- rewriter.replaceOpWithNewOp<TargetOp>(op, desc[0], desc[1], desc[2],
- desc[3], /*cachePolicy=*/0,
- /*alias_scopes=*/nullptr,
- /*noalias_scopes=*/nullptr,
- /*tbaa=*/nullptr);
- return success();
- }
-};
-
struct ConvertAMDGPUToROCDLPass
: public impl::ConvertAMDGPUToROCDLPassBase<ConvertAMDGPUToROCDLPass> {
using Base::Base;
@@ -3327,24 +3306,6 @@ void mlir::populateAMDGPUTypeAndAttributeConversions(
Type i32 = IntegerType::get(type.getContext(), 32);
return typeConverter.convertType(VectorType::get(4, i32));
});
- typeConverter.addConversion(
- [&](TDMDescriptorType type,
- SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
- Type i32 = IntegerType::get(type.getContext(), 32);
- Type v4i32 = typeConverter.convertType(VectorType::get(4, i32));
- Type v8i32 = typeConverter.convertType(VectorType::get(8, i32));
- llvm::append_values(result, v4i32, v8i32, v4i32, v4i32);
- return success();
- });
-
- auto addUnrealizedCast = [](OpBuilder &builder, TypeRange types,
- ValueRange inputs,
- Location loc) -> SmallVector<Value> {
- auto cast = UnrealizedConversionCastOp::create(builder, loc, types, inputs);
- return cast.getResults();
- };
-
- typeConverter.addTargetMaterialization(addUnrealizedCast);
}
void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
@@ -3375,11 +3336,7 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
AMDGPUMakeDmaBaseLowering<MakeDmaBaseOp>,
AMDGPUMakeDmaBaseLowering<MakeGatherDmaBaseOp>,
AMDGPULowerDescriptor<MakeDmaDescriptorOp>,
- AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>,
- AMDGPUTensorLoadStoreOpLowering<TensorLoadToLDSOp,
- ROCDL::TensorLoadToLDSOp>,
- AMDGPUTensorLoadStoreOpLowering<TensorStoreFromLDSOp,
- ROCDL::TensorStoreFromLDSOp>>(
- converter, chipset);
+ AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>>(converter,
+ chipset);
patterns.add<AMDGPUSwizzleBitModeLowering>(converter);
}
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
index e62db9ff571bf..4979e85785970 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
@@ -773,24 +773,6 @@ func.func @make_dma_descriptor_workgroup_mask(%base: !amdgpu.tdm_base<i32>, %wg_
func.return %descriptor : !amdgpu.tdm_descriptor
}
-// CHECK-LABEL: func @tensor_load_to_lds
-// CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
-func.func @tensor_load_to_lds(%desc: !amdgpu.tdm_descriptor) {
- // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
- // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
- amdgpu.tensor_load_to_lds %desc : !amdgpu.tdm_descriptor
- func.return
-}
-
-// CHECK-LABEL: func @tensor_store_from_lds
-// CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
-func.func @tensor_store_from_lds(%desc: !amdgpu.tdm_descriptor) {
- // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
- // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
- amdgpu.tensor_store_from_lds %desc : !amdgpu.tdm_descriptor
- func.return
-}
-
// -----
// CHECK-LABEL: func @make_gather_dma_descriptor
More information about the Mlir-commits
mailing list