[Mlir-commits] [mlir] 97f4c22 - [mlir][sparse][gpu] unify dnmat and dnvec handle and ops
Kun Wu
llvmlistbot at llvm.org
Fri Jun 9 10:16:59 PDT 2023
Author: Kun Wu
Date: 2023-06-09T17:16:48Z
New Revision: 97f4c22b3ad55763ccd9a1274aba22efc87f3e9e
URL: https://github.com/llvm/llvm-project/commit/97f4c22b3ad55763ccd9a1274aba22efc87f3e9e
DIFF: https://github.com/llvm/llvm-project/commit/97f4c22b3ad55763ccd9a1274aba22efc87f3e9e.diff
LOG: [mlir][sparse][gpu] unify dnmat and dnvec handle and ops
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D152465
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
mlir/test/Dialect/GPU/ops.mlir
mlir/test/Dialect/GPU/sparse-roundtrip.mlir
mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 63f18ebb46a93..d3d31cdb75b48 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -116,17 +116,11 @@ def GPU_SparseEnvHandle :
"sparse environment handle type">,
BuildableType<"mlir::gpu::SparseEnvHandleType::get($_builder.getContext())">;
-def GPU_SparseDnVecHandle :
+def GPU_SparseDnTensorHandle :
DialectType<GPU_Dialect,
- CPred<"llvm::isa<::mlir::gpu::SparseDnVecHandleType>($_self)">,
- "dense vector handle type">,
- BuildableType<"mlir::gpu::SparseDnVecHandleType::get($_builder.getContext())">;
-
-def GPU_SparseDnMatHandle :
- DialectType<GPU_Dialect,
- CPred<"llvm::isa<::mlir::gpu::SparseDnMatHandleType>($_self)">,
- "dense matrix handle type">,
- BuildableType<"mlir::gpu::SparseDnMatHandleType::get($_builder.getContext())">;
+ CPred<"llvm::isa<::mlir::gpu::SparseDnTensorHandleType>($_self)">,
+ "dense tensor handle type">,
+ BuildableType<"mlir::gpu::SparseDnTensorHandleType::get($_builder.getContext())">;
def GPU_SparseSpMatHandle :
DialectType<GPU_Dialect,
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 22b5b56074b6a..e32ea5c38e6e1 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -165,7 +165,7 @@ class MMAMatrixType
void addAsyncDependency(Operation *op, Value token);
// Handle types for sparse.
-enum class SparseHandleKind { Env, DnVec, DnMat, SpMat };
+enum class SparseHandleKind { Env, SpMat, DnTensor };
template <SparseHandleKind K>
class SparseHandleType
@@ -177,8 +177,7 @@ class SparseHandleType
};
using SparseEnvHandleType = SparseHandleType<SparseHandleKind::Env>;
-using SparseDnVecHandleType = SparseHandleType<SparseHandleKind::DnVec>;
-using SparseDnMatHandleType = SparseHandleType<SparseHandleKind::DnMat>;
+using SparseDnTensorHandleType = SparseHandleType<SparseHandleKind::DnTensor>;
using SparseSpMatHandleType = SparseHandleType<SparseHandleKind::SpMat>;
} // namespace gpu
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 3a93ddae2f5ae..280039e1bf18a 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1597,73 +1597,13 @@ def GPU_DestroySparseEnvOp : GPU_Op<
}];
}
-def GPU_CreateDnVecOp : GPU_Op<"create_dn_vec", [GPU_AsyncOpInterface]> {
- let summary = "Create dense vector operation";
+def GPU_CreateDnTensorOp : GPU_Op<"create_dn_tensor", [GPU_AsyncOpInterface, AttrSizedOperandSegments]> {
+ let summary = "Create dense tensor operation";
let description = [{
- The `gpu.create_dn_vec` operation initializes a dense vector from
- the given values buffer and size. The buffer must already be copied
- from the host to the device prior to using this operation. The
- operation returns a handle to the dense vector descriptor.
-
- If the `async` keyword is present, the op is executed asynchronously (i.e.
- it does not block until the execution has finished on the device). In
- that case, it returns a !gpu.async.token in addition to the environment.
-
- Example:
-
- ```mlir
- %dvec, %token = gpu.create_dn_vec async [%dep] %env, %mem, %size : memref<?xf64>
- ```
- }];
-
- let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
- GPU_SparseEnvHandle:$env,
- AnyMemRef:$memref,
- Index:$size);
- let results = (outs Res<GPU_SparseDnVecHandle>:$dvec,
- Optional<GPU_AsyncToken>:$asyncToken);
-
- let assemblyFormat = [{
- custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $memref `,` $size attr-dict `:` type($memref)
- }];
-}
-
-def GPU_DestroyDnVecOp : GPU_Op<"destroy_dn_vec", [GPU_AsyncOpInterface]> {
- let summary = "Destroy dense vector operation";
- let description = [{
- The `gpu.destroy_dn_vec` operation releases all resources of a dense
- vector represented by a handle that was previously created by a
- `gpu.create_dn_vec` operation.
-
- If the `async` keyword is present, the op is executed asynchronously (i.e.
- it does not block until the execution has finished on the device). In
- that case, it returns a !gpu.async.token in addition to the environment.
-
- Example:
-
- ```mlir
- %token = gpu.destroy_dn_vec async [%dep] %dvec
- ```
- }];
-
- let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
- Arg<GPU_SparseDnVecHandle>:$dvec);
- let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
-
- let assemblyFormat = [{
- custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $dvec attr-dict
- }];
-}
-
-def GPU_CreateDnMatOp : GPU_Op<"create_dn_mat", [GPU_AsyncOpInterface]> {
- let summary = "Create dense matrix operation";
- let description = [{
- The `gpu.create_dn_mat` operation initializes a dense matrix from
+ The `gpu.create_dn_tensor` operation initializes a dense tensor from
the given values buffer and sizes. The buffer must already be copied
from the host to the device prior to using this operation. The
- operation returns a handle to the dense matrix descriptor.
+ operation returns a handle to the dense tensor descriptor.
If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In
@@ -1672,29 +1612,28 @@ def GPU_CreateDnMatOp : GPU_Op<"create_dn_mat", [GPU_AsyncOpInterface]> {
Example:
```mlir
- %dmat, %token = gpu.create_dn_mat async [%dep] %env, %rows, %cols, %mem : memref<?xf64>
+ %dmat, %token = gpu.create_dn_tensor async [%dep] %env, %mem, %dims : index, index into memref<?xf64>
```
}];
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
GPU_SparseEnvHandle:$env,
- Index:$rows,
- Index:$cols,
- AnyMemRef:$memref);
- let results = (outs Res<GPU_SparseDnMatHandle>:$dmat, Optional<GPU_AsyncToken>:$asyncToken);
+ AnyMemRef:$memref,
+ Variadic<Index>:$dims);
+ let results = (outs Res<GPU_SparseDnTensorHandle>:$dnTensor, Optional<GPU_AsyncToken>:$asyncToken);
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $rows `,` $cols `,` $memref attr-dict `:` type($memref)
+ $env `,` $memref `,` $dims attr-dict `:` type($dims) `into` type($memref)
}];
}
-def GPU_DestroyDnMatOp : GPU_Op<"destroy_dn_mat", [GPU_AsyncOpInterface]> {
- let summary = "Destroy dense matrix operation";
+def GPU_DestroyDnTensorOp : GPU_Op<"destroy_dn_tensor", [GPU_AsyncOpInterface]> {
+ let summary = "Destroy dense tensor operation";
let description = [{
- The `gpu.destroy_dn_mat` operation releases all resources of a dense
- matrix represented by a handle that was previously created by a
- `gpu.create_dn_mat` operation.
+ The `gpu.destroy_dn_tensor` operation releases all resources of a dense
+ tensor represented by a handle that was previously created by a
+ `gpu.create_dn_tensor` operation.
If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In
@@ -1703,17 +1642,17 @@ def GPU_DestroyDnMatOp : GPU_Op<"destroy_dn_mat", [GPU_AsyncOpInterface]> {
Example:
```mlir
- %token = gpu.destroy_dn_vec async [%dep] %dmat
+ %token = gpu.destroy_dn_tensor async [%dep] %dnTensor
```
}];
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
- Arg<GPU_SparseDnMatHandle>:$dmat);
+ Arg<GPU_SparseDnTensorHandle>:$dnTensor);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $dmat attr-dict
+ $dnTensor attr-dict
}];
}
@@ -1945,8 +1884,8 @@ def GPU_SpMVBufferSizeOp : GPU_Op<"spmv_buffer_size", [GPU_AsyncOpInterface]> {
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_SparseSpMatHandle:$spmatA,
- GPU_SparseDnVecHandle:$dnX,
- GPU_SparseDnVecHandle:$dnY,
+ GPU_SparseDnTensorHandle:$dnX,
+ GPU_SparseDnTensorHandle:$dnY,
TypeAttr:$computeType);
let results = (outs Res<Index>:$bufferSz,
Optional<GPU_AsyncToken>:$asyncToken);
@@ -1998,8 +1937,8 @@ def GPU_SpMVOp : GPU_Op<"spmv", [GPU_AsyncOpInterface]> {
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_SparseSpMatHandle:$spmatA,
- GPU_SparseDnVecHandle:$dnX,
- GPU_SparseDnVecHandle:$dnY,
+ GPU_SparseDnTensorHandle:$dnX,
+ GPU_SparseDnTensorHandle:$dnY,
TypeAttr:$computeType,
AnyMemRef:$buffer);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
@@ -2052,8 +1991,8 @@ def GPU_SpMMBufferSizeOp : GPU_Op<"spmm_buffer_size", [GPU_AsyncOpInterface]> {
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
GPU_SparseSpMatHandle:$spmatA,
- GPU_SparseDnMatHandle:$dnmatB,
- GPU_SparseDnMatHandle:$dnmatC,
+ GPU_SparseDnTensorHandle:$dnmatB,
+ GPU_SparseDnTensorHandle:$dnmatC,
TypeAttr:$computeType);
let results = (outs Res<AnyTypeOf<[Index, TupleOf<[Index, Index,
Index]>]>>:$bufferSzs,
@@ -2108,8 +2047,8 @@ def GPU_SpMMOp : GPU_Op<"spmm", [GPU_AsyncOpInterface, AttrSizedOperandSegments]
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
GPU_SparseSpMatHandle:$spmatA,
- GPU_SparseDnMatHandle:$dnmatB,
- GPU_SparseDnMatHandle:$dnmatC,
+ GPU_SparseDnTensorHandle:$dnmatB,
+ GPU_SparseDnTensorHandle:$dnmatC,
TypeAttr:$computeType,
Variadic<AnyMemRef>:$buffers);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
@@ -2162,8 +2101,8 @@ def GPU_SDDMMBufferSizeOp : GPU_Op<"sddmm_buffer_size", [GPU_AsyncOpInterface]>
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
- GPU_SparseDnMatHandle:$dnmatA,
- GPU_SparseDnMatHandle:$dnmatB,
+ GPU_SparseDnTensorHandle:$dnmatA,
+ GPU_SparseDnTensorHandle:$dnmatB,
GPU_SparseSpMatHandle:$spmatC,
TypeAttr:$computeType);
let results = (outs Res<Index>:$bufferSz, Optional<GPU_AsyncToken>:$asyncToken);
@@ -2216,8 +2155,8 @@ def GPU_SDDMMOp : GPU_Op<"sddmm", [GPU_AsyncOpInterface]> {
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
- GPU_SparseDnMatHandle:$dnmatA,
- GPU_SparseDnMatHandle:$dnmatB,
+ GPU_SparseDnTensorHandle:$dnmatA,
+ GPU_SparseDnTensorHandle:$dnmatB,
GPU_SparseSpMatHandle:$spmatC,
TypeAttr:$computeType,
AnyMemRef:$buffer);
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index 272a9074f804e..689a705350c75 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -548,51 +548,31 @@ class ConvertDestroySparseEnvOpToGpuRuntimeCallPattern
ConversionPatternRewriter &rewriter) const override;
};
-class ConvertCreateDnVecOpToGpuRuntimeCallPattern
- : public ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnVecOp> {
+class ConvertCreateDnTensorOpToGpuRuntimeCallPattern
+ : public ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnTensorOp> {
public:
- ConvertCreateDnVecOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
- : ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnVecOp>(typeConverter) {}
-
-private:
- LogicalResult
- matchAndRewrite(gpu::CreateDnVecOp op, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override;
-};
-
-class ConvertDestroyDnVecOpToGpuRuntimeCallPattern
- : public ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnVecOp> {
-public:
- ConvertDestroyDnVecOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
- : ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnVecOp>(typeConverter) {}
-
-private:
- LogicalResult
- matchAndRewrite(gpu::DestroyDnVecOp op, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override;
-};
-
-class ConvertCreateDnMatOpToGpuRuntimeCallPattern
- : public ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnMatOp> {
-public:
- ConvertCreateDnMatOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
- : ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnMatOp>(typeConverter) {}
+ ConvertCreateDnTensorOpToGpuRuntimeCallPattern(
+ LLVMTypeConverter &typeConverter)
+ : ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnTensorOp>(typeConverter) {
+ }
private:
LogicalResult
- matchAndRewrite(gpu::CreateDnMatOp op, OpAdaptor adaptor,
+ matchAndRewrite(gpu::CreateDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
-class ConvertDestroyDnMatOpToGpuRuntimeCallPattern
- : public ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnMatOp> {
+class ConvertDestroyDnTensorOpToGpuRuntimeCallPattern
+ : public ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnTensorOp> {
public:
- ConvertDestroyDnMatOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
- : ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnMatOp>(typeConverter) {}
+ ConvertDestroyDnTensorOpToGpuRuntimeCallPattern(
+ LLVMTypeConverter &typeConverter)
+ : ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnTensorOp>(
+ typeConverter) {}
private:
LogicalResult
- matchAndRewrite(gpu::DestroyDnMatOp op, OpAdaptor adaptor,
+ matchAndRewrite(gpu::DestroyDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
@@ -1474,102 +1454,90 @@ LogicalResult ConvertDestroySparseEnvOpToGpuRuntimeCallPattern::matchAndRewrite(
return success();
}
-LogicalResult ConvertCreateDnVecOpToGpuRuntimeCallPattern::matchAndRewrite(
- gpu::CreateDnVecOp op, OpAdaptor adaptor,
+LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
+ gpu::CreateDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
failed(isAsyncWithOneDependency(rewriter, op)))
return failure();
Location loc = op.getLoc();
auto stream = adaptor.getAsyncDependencies().front();
- Value pVec =
+ Value pTensor =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
if (!getTypeConverter()->useOpaquePointers())
- pVec = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pVec);
+ pTensor = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pTensor);
Type dType = op.getMemref().getType().getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
- auto handle =
- createDnVecCallBuilder
- .create(loc, rewriter, {adaptor.getSize(), pVec, dtp, stream})
- .getResult();
- rewriter.replaceOp(op, {handle, stream});
- return success();
-}
-LogicalResult ConvertDestroyDnVecOpToGpuRuntimeCallPattern::matchAndRewrite(
- gpu::DestroyDnVecOp op, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const {
- if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
- failed(isAsyncWithOneDependency(rewriter, op)))
- return failure();
- Location loc = op.getLoc();
- auto stream = adaptor.getAsyncDependencies().front();
- destroyDnVecCallBuilder.create(loc, rewriter, {adaptor.getDvec(), stream});
- rewriter.replaceOp(op, {stream});
- return success();
-}
+ SmallVector<Value, 4> dims;
+ for (Value dim : adaptor.getDims()) {
+ dims.push_back(dim);
+ }
-LogicalResult ConvertCreateDnMatOpToGpuRuntimeCallPattern::matchAndRewrite(
- gpu::CreateDnMatOp op, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const {
- if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
- failed(isAsyncWithOneDependency(rewriter, op)))
- return failure();
- Location loc = op.getLoc();
- auto stream = adaptor.getAsyncDependencies().front();
- Value pMat =
- MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pMat = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pMat);
- Type dType = op.getMemref().getType().getElementType();
- auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
+ Value handle;
// TODO: For now, we track the use of the handle and lower it to cusparse /
// cusparseLt accordingly. If in a block, both cusparse and cusparseLt are
// used, we require two separate Creation ops to be the correct logic. In
// future, we may add support to using one handle in sparse tensor / GPU
// dialect in both cusparse and cusparseLt. use the cusparseLt create call if
// the dnmat is used with spmat with 2:4 sparsity
- Value handle;
- if (isSpMMCusparseLtOp(op.getDmat())) {
- auto envHandle = adaptor.getEnv();
- AssertSparseLTDnMatHandleSizeCallBuilder.create(loc, rewriter, {});
- auto handleSz = rewriter.create<LLVM::ConstantOp>(
- loc, getIndexType(), rewriter.getIndexAttr(11032));
- handle = rewriter.create<LLVM::AllocaOp>(loc, llvmInt8PointerType,
- llvmInt8Type, handleSz);
- handle = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, handle);
-
- createLtDnMatCallBuilder
- .create(loc, rewriter,
- {handle, envHandle, adaptor.getRows(), adaptor.getCols(), pMat,
- dtp, stream})
- .getResult();
+ if (dims.size() == 2) {
+ if (isSpMMCusparseLtOp(op.getDnTensor())) {
+ auto envHandle = adaptor.getEnv();
+ AssertSparseLTDnMatHandleSizeCallBuilder.create(loc, rewriter, {});
+ auto handleSz = rewriter.create<LLVM::ConstantOp>(
+ loc, getIndexType(), rewriter.getIndexAttr(11032));
+ handle = rewriter.create<LLVM::AllocaOp>(loc, llvmInt8PointerType,
+ llvmInt8Type, handleSz);
+ handle = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, handle);
+
+ createLtDnMatCallBuilder
+ .create(loc, rewriter,
+ {handle, envHandle, dims[0], dims[1], pTensor, dtp, stream})
+ .getResult();
+ } else {
+ handle =
+ createDnMatCallBuilder
+ .create(loc, rewriter, {dims[0], dims[1], pTensor, dtp, stream})
+ .getResult();
+ }
} else {
- handle =
- createDnMatCallBuilder
- .create(loc, rewriter,
- {adaptor.getRows(), adaptor.getCols(), pMat, dtp, stream})
- .getResult();
+ assert(dims.size() == 1 && "Only 1D and 2D tensors are supported");
+ handle = createDnVecCallBuilder
+ .create(loc, rewriter, {dims[0], pTensor, dtp, stream})
+ .getResult();
}
rewriter.replaceOp(op, {handle, stream});
return success();
}
-LogicalResult ConvertDestroyDnMatOpToGpuRuntimeCallPattern::matchAndRewrite(
- gpu::DestroyDnMatOp op, OpAdaptor adaptor,
+LogicalResult ConvertDestroyDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
+ gpu::DestroyDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
failed(isAsyncWithOneDependency(rewriter, op)))
return failure();
Location loc = op.getLoc();
auto stream = adaptor.getAsyncDependencies().front();
- // Use the cusparseLt destroy call if the dnmat is used with spmat with
- // 2:4 sparsity
- if (isSpMMCusparseLtOp(op.getDmat())) {
- destroyCuSparseLtDnMatBuilder.create(loc, rewriter,
- {adaptor.getDmat(), stream});
+ auto definingOp = op.getDnTensor().getDefiningOp<gpu::CreateDnTensorOp>();
+ SmallVector<Value, 4> dims;
+ for (Value dim : definingOp.getDims()) {
+ dims.push_back(dim);
+ }
+ if (dims.size() == 2) {
+ // Use the cusparseLt destroy call if the dnmat is used with spmat with
+ // 2:4 sparsity
+ if (isSpMMCusparseLtOp(op.getDnTensor())) {
+ destroyCuSparseLtDnMatBuilder.create(loc, rewriter,
+ {adaptor.getDnTensor(), stream});
+ } else {
+ destroyDnMatCallBuilder.create(loc, rewriter,
+ {adaptor.getDnTensor(), stream});
+ }
} else {
- destroyDnMatCallBuilder.create(loc, rewriter, {adaptor.getDmat(), stream});
+ assert(dims.size() == 1 && "Only 1D and 2D tensors are supported");
+ destroyDnVecCallBuilder.create(loc, rewriter,
+ {adaptor.getDnTensor(), stream});
}
rewriter.replaceOp(op, {stream});
return success();
@@ -1914,8 +1882,7 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
StringRef gpuBinaryAnnotation,
bool kernelBarePtrCallConv) {
addOpaquePointerConversion<gpu::AsyncTokenType>(converter);
- addOpaquePointerConversion<gpu::SparseDnVecHandleType>(converter);
- addOpaquePointerConversion<gpu::SparseDnMatHandleType>(converter);
+ addOpaquePointerConversion<gpu::SparseDnTensorHandleType>(converter);
addOpaquePointerConversion<gpu::SparseSpMatHandleType>(converter);
addOpaquePointerConversion<gpu::SparseEnvHandleType>(converter);
@@ -1931,10 +1898,8 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
ConvertAsyncYieldToGpuRuntimeCallPattern,
ConvertCreateSparseEnvOpToGpuRuntimeCallPattern,
ConvertDestroySparseEnvOpToGpuRuntimeCallPattern,
- ConvertCreateDnVecOpToGpuRuntimeCallPattern,
- ConvertDestroyDnVecOpToGpuRuntimeCallPattern,
- ConvertCreateDnMatOpToGpuRuntimeCallPattern,
- ConvertDestroyDnMatOpToGpuRuntimeCallPattern,
+ ConvertCreateDnTensorOpToGpuRuntimeCallPattern,
+ ConvertDestroyDnTensorOpToGpuRuntimeCallPattern,
ConvertCreateCooOpToGpuRuntimeCallPattern,
ConvertCreateCooAoSOpToGpuRuntimeCallPattern,
ConvertCreateCsrOpToGpuRuntimeCallPattern,
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 7b9640135257d..06ff669e9a71a 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -147,8 +147,7 @@ void GPUDialect::initialize() {
addTypes<AsyncTokenType>();
addTypes<MMAMatrixType>();
addTypes<SparseEnvHandleType>();
- addTypes<SparseDnVecHandleType>();
- addTypes<SparseDnMatHandleType>();
+ addTypes<SparseDnTensorHandleType>();
addTypes<SparseSpMatHandleType>();
addOperations<
#define GET_OP_LIST
@@ -165,10 +164,8 @@ static std::string getSparseHandleKeyword(SparseHandleKind kind) {
switch (kind) {
case SparseHandleKind::Env:
return "sparse.env_handle";
- case SparseHandleKind::DnVec:
- return "sparse.dnvec_handle";
- case SparseHandleKind::DnMat:
- return "sparse.dnmat_handle";
+ case SparseHandleKind::DnTensor:
+ return "sparse.dntensor_handle";
case SparseHandleKind::SpMat:
return "sparse.spmat_handle";
}
@@ -221,10 +218,8 @@ Type GPUDialect::parseType(DialectAsmParser &parser) const {
if (keyword == getSparseHandleKeyword(SparseHandleKind::Env))
return SparseEnvHandleType::get(context);
- if (keyword == getSparseHandleKeyword(SparseHandleKind::DnVec))
- return SparseDnVecHandleType::get(context);
- if (keyword == getSparseHandleKeyword(SparseHandleKind::DnMat))
- return SparseDnMatHandleType::get(context);
+ if (keyword == getSparseHandleKeyword(SparseHandleKind::DnTensor))
+ return SparseDnTensorHandleType::get(context);
if (keyword == getSparseHandleKeyword(SparseHandleKind::SpMat))
return SparseSpMatHandleType::get(context);
@@ -238,10 +233,9 @@ void GPUDialect::printType(Type type, DialectAsmPrinter &os) const {
.Case<AsyncTokenType>([&](Type) { os << "async.token"; })
.Case<SparseEnvHandleType>(
[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::Env); })
- .Case<SparseDnVecHandleType>(
- [&](Type) { os << getSparseHandleKeyword(SparseHandleKind::DnVec); })
- .Case<SparseDnMatHandleType>(
- [&](Type) { os << getSparseHandleKeyword(SparseHandleKind::DnMat); })
+ .Case<SparseDnTensorHandleType>([&](Type) {
+ os << getSparseHandleKeyword(SparseHandleKind::DnTensor);
+ })
.Case<SparseSpMatHandleType>(
[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::SpMat); })
.Case<MMAMatrixType>([&](MMAMatrixType fragTy) {
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
index 9b9f179fb5f7f..e4e55574bbb68 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
@@ -450,7 +450,7 @@ static LogicalResult rewriteSpMV(PatternRewriter &rewriter,
// Create sparse environment and sparse matrix/dense vector handles.
Type indexTp = rewriter.getIndexType();
Type envHandleTp = rewriter.getType<gpu::SparseEnvHandleType>();
- Type dnVecHandleTp = rewriter.getType<gpu::SparseDnVecHandleType>();
+ Type dnTensorHandleTp = rewriter.getType<gpu::SparseDnTensorHandleType>();
Type spmatHandleTp = rewriter.getType<gpu::SparseSpMatHandleType>();
Type tokenTp = rewriter.getType<gpu::AsyncTokenType>();
Value token = genFirstWait(rewriter, loc);
@@ -463,12 +463,12 @@ static LogicalResult rewriteSpMV(PatternRewriter &rewriter,
rowA, colA, valA, isCOO, enableRT);
Value spMatA = spGenA->getResult(0);
token = spGenA->getResult(1);
- auto dvecX = rewriter.create<gpu::CreateDnVecOp>(loc, dnVecHandleTp, tokenTp,
- token, handle, vecX, szX);
+ auto dvecX = rewriter.create<gpu::CreateDnTensorOp>(
+ loc, dnTensorHandleTp, tokenTp, token, handle, vecX, szX);
Value dnX = dvecX.getResult(0);
token = dvecX.getAsyncToken();
- auto dvecY = rewriter.create<gpu::CreateDnVecOp>(loc, dnVecHandleTp, tokenTp,
- token, handle, vecY, szY);
+ auto dvecY = rewriter.create<gpu::CreateDnTensorOp>(
+ loc, dnTensorHandleTp, tokenTp, token, handle, vecY, szY);
Value dnY = dvecY.getResult(0);
token = dvecY.getAsyncToken();
@@ -493,9 +493,9 @@ static LogicalResult rewriteSpMV(PatternRewriter &rewriter,
// Copy data back to host and free all the resoures.
token = rewriter.create<gpu::DestroySpMatOp>(loc, tokenTp, token, spMatA)
.getAsyncToken();
- token = rewriter.create<gpu::DestroyDnVecOp>(loc, tokenTp, token, dnX)
+ token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnX)
.getAsyncToken();
- token = rewriter.create<gpu::DestroyDnVecOp>(loc, tokenTp, token, dnY)
+ token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnY)
.getAsyncToken();
token = rewriter.create<gpu::DestroySparseEnvOp>(loc, tokenTp, token, handle)
.getAsyncToken();
@@ -557,7 +557,7 @@ static LogicalResult rewriteSpMM(PatternRewriter &rewriter,
// Create sparse environment and sparse matrix/dense matrix handles.
Type indexTp = rewriter.getIndexType();
Type envHandleTp = rewriter.getType<gpu::SparseEnvHandleType>();
- Type dnMatHandleTp = rewriter.getType<gpu::SparseDnMatHandleType>();
+ Type dnTensorHandleTp = rewriter.getType<gpu::SparseDnTensorHandleType>();
Type spMatHandleTp = rewriter.getType<gpu::SparseSpMatHandleType>();
Type tokenTp = rewriter.getType<gpu::AsyncTokenType>();
Value token = genFirstWait(rewriter, loc);
@@ -570,12 +570,14 @@ static LogicalResult rewriteSpMM(PatternRewriter &rewriter,
rowA, colA, valA, isCOO, enableRT);
Value spMatA = spGenA->getResult(0);
token = spGenA->getResult(1);
- auto dmatB = rewriter.create<gpu::CreateDnMatOp>(
- loc, dnMatHandleTp, tokenTp, token, handle, szk, szn, matB);
+ auto dmatB = rewriter.create<gpu::CreateDnTensorOp>(
+ loc, dnTensorHandleTp, tokenTp, token, handle, matB,
+ SmallVector<Value>{szk, szn});
Value dnB = dmatB.getResult(0);
token = dmatB.getAsyncToken();
- auto dmatC = rewriter.create<gpu::CreateDnMatOp>(
- loc, dnMatHandleTp, tokenTp, token, handle, szm, szn, matC);
+ auto dmatC = rewriter.create<gpu::CreateDnTensorOp>(
+ loc, dnTensorHandleTp, tokenTp, token, handle, matC,
+ SmallVector<Value>{szm, szn});
Value dnC = dmatC.getResult(0);
token = dmatC.getAsyncToken();
@@ -602,9 +604,9 @@ static LogicalResult rewriteSpMM(PatternRewriter &rewriter,
// Copy data back to host and free all the resoures.
token = rewriter.create<gpu::DestroySpMatOp>(loc, tokenTp, token, spMatA)
.getAsyncToken();
- token = rewriter.create<gpu::DestroyDnMatOp>(loc, tokenTp, token, dnB)
+ token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnB)
.getAsyncToken();
- token = rewriter.create<gpu::DestroyDnMatOp>(loc, tokenTp, token, dnC)
+ token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnC)
.getAsyncToken();
token = rewriter.create<gpu::DestroySparseEnvOp>(loc, tokenTp, token, handle)
.getAsyncToken();
diff --git a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
index fbb9491407abd..8fa28cfeae3b0 100644
--- a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
@@ -22,11 +22,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf16>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_2to4_spmat async [%token3] %env, %arg0, %arg0, %mem1: memref<?xf16>
- %dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf16>
+ %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf16>
%bufferSzs, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat : tuple<index,index,index> into f16
%token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2, %mem2, %mem2 : memref<?xf16>,memref<?xf16>,memref<?xf16> into f16
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
- %token9 = gpu.destroy_dn_mat async [%token8] %dnmat
+ %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
index fcee1ac9175a3..6b7d2b9b87fe7 100644
--- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
@@ -22,11 +22,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %dnvec, %token5 = gpu.create_dn_vec async [%token4] %env, %mem2, %arg0 : memref<?xf64>
+ %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0 : index into memref<?xf64>
%bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64
%token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
- %token9 = gpu.destroy_dn_vec async [%token8] %dnvec
+ %token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -52,11 +52,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
+ %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat : index into f64
%token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
- %token9 = gpu.destroy_dn_mat async [%token8] %dnmat
+ %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -82,11 +82,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
+ %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64
%token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
- %token9 = gpu.destroy_dn_mat async [%token8] %dnmat
+ %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 915d923ea1246..3f5dbb15660c2 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -332,14 +332,14 @@ module attributes {gpu.container_module} {
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: gpu.create_csr async
%spmat2, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- // CHECK: gpu.create_dn_vec async
- %dnvec, %token6 = gpu.create_dn_vec async [%token5] %env, %mem2, %arg0 : memref<?xf64>
+ // CHECK: gpu.create_dn_tensor async
+ %dnvec, %token6 = gpu.create_dn_tensor async [%token5] %env, %mem2, %arg0 : index into memref<?xf64>
// CHECK: gpu.spmv_buffer_size async
%bufferSz, %token7 = gpu.spmv_buffer_size async [%token6] %env, %spmat, %dnvec, %dnvec into f64
// CHECK: gpu.spmv async
%token8 = gpu.spmv async [%token7] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
- // CHECK: gpu.create_dn_mat async
- %dnmat, %token9 = gpu.create_dn_mat async [%token8] %env, %arg0, %arg0, %mem2 : memref<?xf64>
+ // CHECK: gpu.create_dn_tensor async
+ %dnmat, %token9 = gpu.create_dn_tensor async [%token8] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
// CHECK: gpu.spmm_buffer_size async
%bufferSz2, %token10 = gpu.spmm_buffer_size async [%token9] %env, %spmat, %dnmat, %dnmat : index into f64
// CHECK: gpu.spmm async
@@ -348,12 +348,12 @@ module attributes {gpu.container_module} {
%bufferSz3, %token12 = gpu.sddmm_buffer_size async [%token11] %env, %dnmat, %dnmat, %spmat into f64
// CHECK: gpu.sddmm async
%token13 = gpu.sddmm async [%token12] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
- // CHECK: gpu.destroy_dn_mat async
- %token14 = gpu.destroy_dn_mat async [%token13] %dnmat
+ // CHECK: gpu.destroy_dn_tensor async
+ %token14 = gpu.destroy_dn_tensor async [%token13] %dnmat
// CHECK: gpu.destroy_sp_mat async
%token15 = gpu.destroy_sp_mat async [%token14] %spmat
- // CHECK: gpu.destroy_dn_vec async
- %token16 = gpu.destroy_dn_vec async [%token15] %dnvec
+ // CHECK: gpu.destroy_dn_tensor async
+ %token16 = gpu.destroy_dn_tensor async [%token15] %dnvec
// CHECK: gpu.destroy_sparse_env async
%token17 = gpu.destroy_sparse_env async [%token16] %env
// CHECK: gpu.wait
diff --git a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
index c531ee28eb7bd..6766c982df789 100644
--- a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
+++ b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
@@ -8,11 +8,11 @@ module attributes {gpu.container_module} {
// CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_coo async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
- // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_vec async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : index into memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
// CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
- // CHECK: %{{.*}} = gpu.destroy_dn_vec async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
// CHECK: gpu.wait [%{{.*}}]
// CHECK: return
@@ -22,11 +22,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %dnvec, %token5 = gpu.create_dn_vec async [%token4] %env, %mem2, %arg0 : memref<?xf64>
+ %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0 : index into memref<?xf64>
%bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64
%token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
- %token9 = gpu.destroy_dn_vec async [%token8] %dnvec
+ %token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -38,11 +38,11 @@ module attributes {gpu.container_module} {
// CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
- // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_mat async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : index, index into memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.spmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
// CHECK: %{{.*}} = gpu.spmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
- // CHECK: %{{.*}} = gpu.destroy_dn_mat async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
// CHECK: gpu.wait [%{{.*}}]
// CHECK: return
@@ -52,11 +52,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
+ %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat : index into f64
%token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
- %token9 = gpu.destroy_dn_mat async [%token8] %dnmat
+ %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -68,11 +68,11 @@ module attributes {gpu.container_module} {
// CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
- // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_mat async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : index, index into memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.sddmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
// CHECK: %{{.*}} = gpu.sddmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
- // CHECK: %{{.*}} = gpu.destroy_dn_mat async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
// CHECK: gpu.wait [%{{.*}}]
// CHECK: return
@@ -82,11 +82,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
+ %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64
%token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
- %token9 = gpu.destroy_dn_mat async [%token8] %dnmat
+ %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
index b51cb769fdb07..c8b7e4835f86f 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
@@ -47,14 +47,14 @@
// CHECK: %[[VAL_41:.*]] = gpu.wait async
// CHECK: %[[VAL_42:.*]], %[[VAL_43:.*]] = gpu.create_sparse_env async {{\[}}%[[VAL_41]]]
// CHECK: %[[VAL_44:.*]], %[[VAL_45:.*]] = gpu.create_csr async {{\[}}%[[VAL_43]]] %[[VAL_6]], %[[VAL_7]], %[[VAL_5]], %[[VAL_14]], %[[VAL_19]], %[[VAL_24]] : memref<?xindex>, memref<?xindex>, memref<?xf64>
-// CHECK: %[[VAL_46:.*]], %[[VAL_47:.*]] = gpu.create_dn_mat async {{\[}}%[[VAL_45]]] %[[VAL_42]], %[[VAL_7]], %[[VAL_8]], %[[VAL_31]] : memref<?x?xf64>
-// CHECK: %[[VAL_48:.*]], %[[VAL_49:.*]] = gpu.create_dn_mat async {{\[}}%[[VAL_47]]] %[[VAL_42]], %[[VAL_6]], %[[VAL_8]], %[[VAL_38]] : memref<?x?xf64>
+// CHECK: %[[VAL_46:.*]], %[[VAL_47:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_45]]] %[[VAL_42]], %[[VAL_31]], %[[VAL_7]], %[[VAL_8]] : index, index into memref<?x?xf64>
+// CHECK: %[[VAL_48:.*]], %[[VAL_49:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_47]]] %[[VAL_42]], %[[VAL_38]], %[[VAL_6]], %[[VAL_8]] : index, index into memref<?x?xf64>
// CHECK: %[[VAL_50:.*]], %[[VAL_51:.*]] = gpu.spmm_buffer_size async {{\[}}%[[VAL_49]]] %[[VAL_42]], %[[VAL_44]], %[[VAL_46]], %[[VAL_48]] : index
// CHECK: %[[VAL_52:.*]], %[[VAL_53:.*]] = gpu.alloc async {{\[}}%[[VAL_51]]] (%[[VAL_50]]) : memref<?xi8>
// CHECK: %[[VAL_54:.*]] = gpu.spmm async {{\[}}%[[VAL_53]]] %[[VAL_42]], %[[VAL_44]], %[[VAL_46]], %[[VAL_48]], %[[VAL_52]] : memref<?xi8>
// CHECK: %[[VAL_55:.*]] = gpu.destroy_sp_mat async {{\[}}%[[VAL_54]]] %[[VAL_44]]
-// CHECK: %[[VAL_56:.*]] = gpu.destroy_dn_mat async {{\[}}%[[VAL_55]]] %[[VAL_46]]
-// CHECK: %[[VAL_57:.*]] = gpu.destroy_dn_mat async {{\[}}%[[VAL_56]]] %[[VAL_48]]
+// CHECK: %[[VAL_56:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_55]]] %[[VAL_46]]
+// CHECK: %[[VAL_57:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_56]]] %[[VAL_48]]
// CHECK: %[[VAL_58:.*]] = gpu.destroy_sparse_env async {{\[}}%[[VAL_57]]] %[[VAL_42]]
// CHECK: %[[VAL_59:.*]] = gpu.dealloc async {{\[}}%[[VAL_58]]] %[[VAL_14]] : memref<?xindex>
// CHECK: %[[VAL_60:.*]] = gpu.dealloc async {{\[}}%[[VAL_59]]] %[[VAL_19]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
index 6fbe7114f2361..4d267fb68c79b 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
@@ -45,14 +45,14 @@ module {
// CHECK: %[[VAL_38:.*]] = gpu.wait async
// CHECK: %[[VAL_39:.*]], %[[VAL_40:.*]] = gpu.create_sparse_env async {{\[}}%[[VAL_38]]]
// CHECK: %[[VAL_41:.*]], %[[VAL_42:.*]] = gpu.create_coo async {{\[}}%[[VAL_40]]] %[[VAL_6]], %[[VAL_7]], %[[VAL_5]], %[[VAL_13]], %[[VAL_18]], %[[VAL_23]] : memref<?xindex>, memref<?xindex>, memref<?xf64>
-// CHECK: %[[VAL_43:.*]], %[[VAL_44:.*]] = gpu.create_dn_vec async {{\[}}%[[VAL_42]]] %[[VAL_39:.*]], %[[VAL_29]], %[[VAL_7]] : memref<?xf64>
-// CHECK: %[[VAL_45:.*]], %[[VAL_46:.*]] = gpu.create_dn_vec async {{\[}}%[[VAL_44]]] %[[VAL_39:.*]], %[[VAL_35]], %[[VAL_6]] : memref<?xf64>
+// CHECK: %[[VAL_43:.*]], %[[VAL_44:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_42]]] %[[VAL_39:.*]], %[[VAL_29]], %[[VAL_7]] : index into memref<?xf64>
+// CHECK: %[[VAL_45:.*]], %[[VAL_46:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_44]]] %[[VAL_39:.*]], %[[VAL_35]], %[[VAL_6]] : index into memref<?xf64>
// CHECK: %[[VAL_47:.*]], %[[VAL_48:.*]] = gpu.spmv_buffer_size async {{\[}}%[[VAL_46]]] %[[VAL_39]], %[[VAL_41]], %[[VAL_43]], %[[VAL_45]]
// CHECK: %[[VAL_49:.*]], %[[VAL_50:.*]] = gpu.alloc async {{\[}}%[[VAL_48]]] (%[[VAL_47]]) : memref<?xi8>
// CHECK: %[[VAL_51:.*]] = gpu.spmv async {{\[}}%[[VAL_50]]] %[[VAL_39]], %[[VAL_41]], %[[VAL_43]], %[[VAL_45]], %[[VAL_49]] : memref<?xi8>
// CHECK: %[[VAL_52:.*]] = gpu.destroy_sp_mat async {{\[}}%[[VAL_51]]] %[[VAL_41]]
-// CHECK: %[[VAL_53:.*]] = gpu.destroy_dn_vec async {{\[}}%[[VAL_52]]] %[[VAL_43]]
-// CHECK: %[[VAL_54:.*]] = gpu.destroy_dn_vec async {{\[}}%[[VAL_53]]] %[[VAL_45]]
+// CHECK: %[[VAL_53:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_52]]] %[[VAL_43]]
+// CHECK: %[[VAL_54:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_53]]] %[[VAL_45]]
// CHECK: %[[VAL_55:.*]] = gpu.destroy_sparse_env async {{\[}}%[[VAL_54]]] %[[VAL_39]]
// CHECK: %[[VAL_56:.*]] = gpu.dealloc async {{\[}}%[[VAL_55]]] %[[VAL_13]] : memref<?xindex>
// CHECK: %[[VAL_57:.*]] = gpu.dealloc async {{\[}}%[[VAL_56]]] %[[VAL_18]] : memref<?xindex>
More information about the Mlir-commits
mailing list