[Mlir-commits] [mlir] [MLIR][GPUCommon] Remove typed pointer support (PR #70735)
Christian Ulmann
llvmlistbot at llvm.org
Mon Oct 30 15:14:36 PDT 2023
https://github.com/Dinistro created https://github.com/llvm/llvm-project/pull/70735
This commit removes the GPUCommon's lowering support for typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect.
Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
>From fce83da0a429653c1b58a43554091ca037096c1c Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christian.ulmann at nextsilicon.com>
Date: Mon, 30 Oct 2023 22:07:18 +0000
Subject: [PATCH] [MLIR][GPUCommon] Remove typed pointer support
This commit removes the GPUCommon's lowering support for typed
pointers. Typed pointers have been deprecated for a while now and it's
planned to soon remove them from the LLVM dialect.
Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
---
mlir/include/mlir/Conversion/Passes.td | 5 +-
.../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 2 +-
.../GPUCommon/GPUToLLVMConversion.cpp | 69 ++--------------
...ower-2to4-sparse-to-gpu-runtime-calls.mlir | 2 +-
.../lower-alloc-to-gpu-runtime-calls.mlir | 2 +-
...ower-launch-func-to-gpu-runtime-calls.mlir | 4 +-
.../lower-memcpy-to-gpu-runtime-calls.mlir | 2 +-
.../lower-memset-to-gpu-runtime-calls.mlir | 2 +-
.../lower-sparse-to-gpu-runtime-calls.mlir | 2 +-
.../lower-wait-to-gpu-runtime-calls.mlir | 2 +-
.../Conversion/GPUCommon/transfer_write.mlir | 2 +-
.../Conversion/GPUCommon/typed-pointers.mlir | 82 -------------------
12 files changed, 16 insertions(+), 160 deletions(-)
delete mode 100644 mlir/test/Conversion/GPUCommon/typed-pointers.mlir
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index cf6e545749ffc64..ffad25ea5993a8a 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -460,10 +460,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> {
Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string",
/*default=*/"gpu::getDefaultGpuBinaryAnnotation()",
"Annotation attribute string for GPU binary"
- >,
- Option<"useOpaquePointers", "use-opaque-pointers", "bool",
- /*default=*/"true", "Generate LLVM IR using opaque pointers "
- "instead of typed pointers">,
+ >
];
let dependentDialects = [
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index 447e3c9a59e5c0a..bbed1ea5cf62204 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -210,7 +210,7 @@ class GEPIndicesAdaptor {
/// string (operations inserted at the builder insertion point).
Value createGlobalString(Location loc, OpBuilder &builder, StringRef name,
StringRef value, Linkage linkage,
- bool useOpaquePointers);
+ bool useOpaquePointers = true);
/// LLVM requires some operations to be inside of a Module operation. This
/// function confirms that the Operation has the desired properties.
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index 12bd02050be036c..8b454e2af45b2e3 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -588,7 +588,6 @@ DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SetCsrPointersOp)
void GpuToLLVMConversionPass::runOnOperation() {
LowerToLLVMOptions options(&getContext());
- options.useOpaquePointers = useOpaquePointers;
options.useBarePtrCallConv = hostBarePtrCallConv;
LLVMTypeConverter converter(&getContext(), options);
@@ -835,8 +834,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
// Allocate the underlying buffer and store a pointer to it in the MemRef
// descriptor.
- Type elementPtrType = this->getElementPtrType(memRefType);
-
auto nullPtr = rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPointerType);
Value stream = adaptor.getAsyncDependencies().empty()
? nullPtr
@@ -848,9 +845,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
Value allocatedPtr =
allocCallBuilder.create(loc, rewriter, {sizeBytes, stream, isHostShared})
.getResult();
- if (!getTypeConverter()->useOpaquePointers())
- allocatedPtr =
- rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, allocatedPtr);
// No alignment.
Value alignedPtr = allocatedPtr;
@@ -880,8 +874,6 @@ LogicalResult ConvertDeallocOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pointer =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pointer = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pointer);
Value stream = adaptor.getAsyncDependencies().front();
deallocCallBuilder.create(loc, rewriter, {pointer, stream});
@@ -1050,9 +1042,6 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray(
auto elementPtr = builder.create<LLVM::GEPOp>(
loc, llvmPointerPointerType, llvmPointerType, arrayPtr,
ArrayRef<LLVM::GEPArg>{en.index()});
- if (!getTypeConverter()->useOpaquePointers())
- fieldPtr =
- builder.create<LLVM::BitcastOp>(loc, llvmPointerType, fieldPtr);
builder.create<LLVM::StoreOp>(loc, fieldPtr, elementPtr);
}
return arrayPtr;
@@ -1079,7 +1068,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateKernelNameConstant(
std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name));
return LLVM::createGlobalString(
loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
- LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers());
+ LLVM::Linkage::Internal);
}
// Emits LLVM IR to launch a kernel function. Expects the module that contains
@@ -1170,9 +1159,9 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
SmallString<128> nameBuffer(kernelModule.getName());
nameBuffer.append(kGpuBinaryStorageSuffix);
- Value data = LLVM::createGlobalString(
- loc, rewriter, nameBuffer.str(), binaryAttr.getValue(),
- LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers());
+ Value data =
+ LLVM::createGlobalString(loc, rewriter, nameBuffer.str(),
+ binaryAttr.getValue(), LLVM::Linkage::Internal);
// Pass the binary size. SPIRV requires binary size.
auto gpuBlob = binaryAttr.getValue();
@@ -1244,11 +1233,7 @@ static Value bitAndAddrspaceCast(Location loc,
typeConverter.getPointerType(sourceTy.getElementType(),
destinationType.getAddressSpace()),
sourcePtr);
-
- if (typeConverter.useOpaquePointers())
- return sourcePtr;
-
- return rewriter.create<LLVM::BitcastOp>(loc, destinationType, sourcePtr);
+ return sourcePtr;
}
LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
@@ -1366,8 +1351,6 @@ LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pTensor =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pTensor = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pTensor);
Type dType = op.getMemref().getType().getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
@@ -1457,11 +1440,6 @@ LogicalResult ConvertCreateCooOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowIdxs);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type iType =
llvm::cast<MemRefType>(op.getColIdxs().getType()).getElementType();
Type dType =
@@ -1489,10 +1467,6 @@ LogicalResult ConvertCreateCooAoSOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pIdxs = MemRefDescriptor(adaptor.getIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type iType = llvm::cast<MemRefType>(op.getIdxs().getType()).getElementType();
Type dType =
llvm::cast<MemRefType>(op.getValues().getType()).getElementType();
@@ -1522,11 +1496,6 @@ LogicalResult ConvertCreateCsrOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowPos);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getRowPos().getType()).getElementType();
Type iType =
@@ -1556,8 +1525,6 @@ LogicalResult ConvertCreate2To4SpMatOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pMat =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pMat = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pMat);
Type dType =
llvm::cast<MemRefType>(op.getMemref().getType()).getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
@@ -1630,8 +1597,6 @@ LogicalResult ConvertSpMVOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
spMVCallBuilder.create(loc, rewriter,
{modeA, adaptor.getSpmatA(), adaptor.getDnX(),
adaptor.getDnY(), computeType, pBuf, stream});
@@ -1737,8 +1702,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite(
SmallVector<Value> pBufs;
for (Value buffer : adaptor.getBuffers()) {
Value pBuf = MemRefDescriptor(buffer).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
pBufs.push_back(pBuf);
}
createCuSparseLtSpMMBuilder.create(
@@ -1748,8 +1711,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite(
} else {
Value pBuf = MemRefDescriptor(adaptor.getBuffers().front())
.allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
createSpMMCallBuilder.create(loc, rewriter,
{modeA, modeB, adaptor.getSpmatA(),
adaptor.getDnmatB(), adaptor.getDnmatC(),
@@ -1781,8 +1742,6 @@ LogicalResult ConvertSDDMMOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
createSDDMMCallBuilder.create(loc, rewriter,
{modeA, modeB, adaptor.getDnmatA(),
adaptor.getDnmatB(), adaptor.getSpmatC(),
@@ -1837,9 +1796,6 @@ ConvertSpGEMMWorkEstimationOrComputeOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
-
Value bufferSizeNew;
if (adaptor.getKind() ==
@@ -1934,11 +1890,6 @@ LogicalResult ConvertSetCsrPointersOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getCoordinates()).allocatedPtr(rewriter, loc);
Value pVal =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pPos);
- pCrd = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pCrd);
- pVal = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pVal);
- }
createSetCsrPointersBuilder.create(
loc, rewriter, {adaptor.getSpmat(), pPos, pCrd, pVal, stream});
rewriter.replaceOp(op, {stream});
@@ -1959,11 +1910,6 @@ LogicalResult ConvertCreateCscOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getRowIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pColPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColPos);
- pRowIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getColPos().getType()).getElementType();
Type iType =
@@ -1997,11 +1943,6 @@ LogicalResult ConvertCreateBsrOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getBColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowPos);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getBRowPos().getType()).getElementType();
Type iType =
diff --git a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
index 113d49c507e9c8e..f448d35992333b3 100644
--- a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
index 70450656b9df64f..ae8b7aaac7fd944 100644
--- a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
// CHECK-LABEL: llvm.func @main
diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
index 9df110d9b23bacf..1b9afcdf50a17f0 100644
--- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin use-opaque-pointers=1" -split-input-file | FileCheck %s
-// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco use-opaque-pointers=1" -split-input-file | FileCheck %s --check-prefix=ROCDL
+// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco" -split-input-file | FileCheck %s --check-prefix=ROCDL
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
index 5c8e6d11934dbba..3f86b0769827957 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
index 7e4b1191c5e6c29..aaced31813d574a 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
index f86d929e0e19acf..d4c0a76088356f5 100644
--- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
index a828c1d58da5f9e..d15efe354cfa8b4 100644
--- a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/transfer_write.mlir b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
index 9c2edf698548ba8..cba85915b49e43a 100644
--- a/mlir/test/Conversion/GPUCommon/transfer_write.mlir
+++ b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
func.func @warp_extract(%arg0: index, %arg1: memref<1024x1024xf32>, %arg2: index, %arg3: vector<1xf32>) {
%c0 = arith.constant 0 : index
diff --git a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir b/mlir/test/Conversion/GPUCommon/typed-pointers.mlir
deleted file mode 100644
index e27162c7dbc1902..000000000000000
--- a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir
+++ /dev/null
@@ -1,82 +0,0 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=0' --split-input-file | FileCheck %s
-
-module attributes {gpu.container_module} {
- // CHECK-LABEL: llvm.func @main
- // CHECK-SAME: %[[size:.*]]: i64
- func.func @main(%size : index) {
- // CHECK: %[[stream:.*]] = llvm.call @mgpuStreamCreate()
- %0 = gpu.wait async
- // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]]
- // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]]
- // CHECK: %[[isHostShared:.*]] = llvm.mlir.constant
- // CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]], %[[isHostShared]])
- %1, %2 = gpu.alloc async [%0] (%size) : memref<?xf32>
- // CHECK: %[[float_ptr:.*]] = llvm.extractvalue {{.*}}[0]
- // CHECK: %[[void_ptr:.*]] = llvm.bitcast %[[float_ptr]]
- // CHECK: llvm.call @mgpuMemFree(%[[void_ptr]], %[[stream]])
- %3 = gpu.dealloc async [%2] %1 : memref<?xf32>
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[stream]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[stream]])
- gpu.wait [%3]
- return
- }
-
- // CHECK: func @foo
- func.func @foo(%dst : memref<7xf32, 1>, %src : memref<7xf32>) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint
- // CHECK-NOT: llvm.addrspacecast
- // CHECK: %[[src:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]])
- %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32>
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
- gpu.wait [%t1]
- return
- }
-}
-
-// -----
-
-module attributes {gpu.container_module} {
-
- // CHECK: func @memset_f32
- func.func @memset_f32(%dst : memref<7xf32, 1>, %value : f32) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
- // CHECK: %[[value:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemset32(%[[dst]], %[[value]], %[[size_bytes]], %[[t0]])
- %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf32, 1>, f32
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
- gpu.wait [%t1]
- return
- }
-}
-
-// -----
-
-module attributes {gpu.container_module} {
-
- // CHECK: func @memset_f16
- func.func @memset_f16(%dst : memref<7xf16, 1>, %value : f16) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
- // CHECK: %[[value:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemset16(%[[dst]], %[[value]], %[[size_bytes]], %[[t0]])
- %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf16, 1>, f16
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
- gpu.wait [%t1]
- return
- }
-}
More information about the Mlir-commits
mailing list