[Mlir-commits] [mlir] dbd4a0d - [MLIR][GPUCommon] Remove typed pointer support (#70735)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Oct 31 01:22:48 PDT 2023
Author: Christian Ulmann
Date: 2023-10-31T09:22:44+01:00
New Revision: dbd4a0dd38eb03df4f7d55c780b3dd6cb15a270d
URL: https://github.com/llvm/llvm-project/commit/dbd4a0dd38eb03df4f7d55c780b3dd6cb15a270d
DIFF: https://github.com/llvm/llvm-project/commit/dbd4a0dd38eb03df4f7d55c780b3dd6cb15a270d.diff
LOG: [MLIR][GPUCommon] Remove typed pointer support (#70735)
This commit removes the GPUCommon's lowering support for typed pointers.
Typed pointers have been deprecated for a while now and it's planned to
soon remove them from the LLVM dialect.
Related PSA:
https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
Added:
Modified:
mlir/include/mlir/Conversion/Passes.td
mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
mlir/test/Conversion/GPUCommon/transfer_write.mlir
Removed:
mlir/test/Conversion/GPUCommon/typed-pointers.mlir
################################################################################
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index ba7dc642af2a079..036c9b0039779ab 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -449,10 +449,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> {
Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string",
/*default=*/"gpu::getDefaultGpuBinaryAnnotation()",
"Annotation attribute string for GPU binary"
- >,
- Option<"useOpaquePointers", "use-opaque-pointers", "bool",
- /*default=*/"true", "Generate LLVM IR using opaque pointers "
- "instead of typed pointers">,
+ >
];
let dependentDialects = [
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index 447e3c9a59e5c0a..bbed1ea5cf62204 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -210,7 +210,7 @@ class GEPIndicesAdaptor {
/// string (operations inserted at the builder insertion point).
Value createGlobalString(Location loc, OpBuilder &builder, StringRef name,
StringRef value, Linkage linkage,
- bool useOpaquePointers);
+ bool useOpaquePointers = true);
/// LLVM requires some operations to be inside of a Module operation. This
/// function confirms that the Operation has the desired properties.
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index 12bd02050be036c..7bac8f5a8f0e03b 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -82,19 +82,12 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
MLIRContext *context = &this->getTypeConverter()->getContext();
Type llvmVoidType = LLVM::LLVMVoidType::get(context);
- LLVM::LLVMPointerType llvmPointerType =
- this->getTypeConverter()->getPointerType(IntegerType::get(context, 8));
- Type llvmPointerPointerType =
- this->getTypeConverter()->getPointerType(llvmPointerType);
+ LLVM::LLVMPointerType llvmPointerType = LLVM::LLVMPointerType::get(context);
Type llvmInt8Type = IntegerType::get(context, 8);
Type llvmInt16Type = IntegerType::get(context, 16);
Type llvmInt32Type = IntegerType::get(context, 32);
Type llvmInt64Type = IntegerType::get(context, 64);
Type llvmFloat32Type = Float32Type::get(context);
- Type llvmInt8PointerType =
- this->getTypeConverter()->getPointerType(llvmInt8Type);
- Type llvmInt64PointerType =
- this->getTypeConverter()->getPointerType(llvmInt64Type);
Type llvmIntPtrType = IntegerType::get(
context, this->getTypeConverter()->getPointerBitwidth(0));
@@ -115,18 +108,18 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
"mgpuLaunchKernel",
llvmVoidType,
{
- llvmPointerType, /* void* f */
- llvmIntPtrType, /* intptr_t gridXDim */
- llvmIntPtrType, /* intptr_t gridyDim */
- llvmIntPtrType, /* intptr_t gridZDim */
- llvmIntPtrType, /* intptr_t blockXDim */
- llvmIntPtrType, /* intptr_t blockYDim */
- llvmIntPtrType, /* intptr_t blockZDim */
- llvmInt32Type, /* unsigned int sharedMemBytes */
- llvmPointerType, /* void *hstream */
- llvmPointerPointerType, /* void **kernelParams */
- llvmPointerPointerType, /* void **extra */
- llvmInt64Type /* size_t paramsCount */
+ llvmPointerType, /* void* f */
+ llvmIntPtrType, /* intptr_t gridXDim */
+ llvmIntPtrType, /* intptr_t gridyDim */
+ llvmIntPtrType, /* intptr_t gridZDim */
+ llvmIntPtrType, /* intptr_t blockXDim */
+ llvmIntPtrType, /* intptr_t blockYDim */
+ llvmIntPtrType, /* intptr_t blockZDim */
+ llvmInt32Type, /* unsigned int sharedMemBytes */
+ llvmPointerType, /* void *hstream */
+ llvmPointerType, /* void **kernelParams */
+ llvmPointerType, /* void **extra */
+ llvmInt64Type /* size_t paramsCount */
}};
FunctionCallBuilder streamCreateCallBuilder = {
"mgpuStreamCreate", llvmPointerType /* void *stream */, {}};
@@ -588,7 +581,6 @@ DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SetCsrPointersOp)
void GpuToLLVMConversionPass::runOnOperation() {
LowerToLLVMOptions options(&getContext());
- options.useOpaquePointers = useOpaquePointers;
options.useBarePtrCallConv = hostBarePtrCallConv;
LLVMTypeConverter converter(&getContext(), options);
@@ -835,8 +827,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
// Allocate the underlying buffer and store a pointer to it in the MemRef
// descriptor.
- Type elementPtrType = this->getElementPtrType(memRefType);
-
auto nullPtr = rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPointerType);
Value stream = adaptor.getAsyncDependencies().empty()
? nullPtr
@@ -848,9 +838,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
Value allocatedPtr =
allocCallBuilder.create(loc, rewriter, {sizeBytes, stream, isHostShared})
.getResult();
- if (!getTypeConverter()->useOpaquePointers())
- allocatedPtr =
- rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, allocatedPtr);
// No alignment.
Value alignedPtr = allocatedPtr;
@@ -880,8 +867,6 @@ LogicalResult ConvertDeallocOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pointer =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pointer = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pointer);
Value stream = adaptor.getAsyncDependencies().front();
deallocCallBuilder.create(loc, rewriter, {pointer, stream});
@@ -1035,24 +1020,21 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray(
auto structType = LLVM::LLVMStructType::getNewIdentified(context, StringRef(),
argumentTypes);
auto one = builder.create<LLVM::ConstantOp>(loc, llvmInt32Type, 1);
- auto structPtr = builder.create<LLVM::AllocaOp>(
- loc, getTypeConverter()->getPointerType(structType), structType, one,
- /*alignment=*/0);
+ auto structPtr =
+ builder.create<LLVM::AllocaOp>(loc, llvmPointerType, structType, one,
+ /*alignment=*/0);
auto arraySize =
builder.create<LLVM::ConstantOp>(loc, llvmInt32Type, numArguments);
auto arrayPtr = builder.create<LLVM::AllocaOp>(
- loc, llvmPointerPointerType, llvmPointerType, arraySize, /*alignment=*/0);
+ loc, llvmPointerType, llvmPointerType, arraySize, /*alignment=*/0);
for (const auto &en : llvm::enumerate(arguments)) {
- Value fieldPtr = builder.create<LLVM::GEPOp>(
- loc, getTypeConverter()->getPointerType(argumentTypes[en.index()]),
- structType, structPtr, ArrayRef<LLVM::GEPArg>{0, en.index()});
+ Value fieldPtr =
+ builder.create<LLVM::GEPOp>(loc, llvmPointerType, structType, structPtr,
+ ArrayRef<LLVM::GEPArg>{0, en.index()});
builder.create<LLVM::StoreOp>(loc, en.value(), fieldPtr);
auto elementPtr = builder.create<LLVM::GEPOp>(
- loc, llvmPointerPointerType, llvmPointerType, arrayPtr,
+ loc, llvmPointerType, llvmPointerType, arrayPtr,
ArrayRef<LLVM::GEPArg>{en.index()});
- if (!getTypeConverter()->useOpaquePointers())
- fieldPtr =
- builder.create<LLVM::BitcastOp>(loc, llvmPointerType, fieldPtr);
builder.create<LLVM::StoreOp>(loc, fieldPtr, elementPtr);
}
return arrayPtr;
@@ -1079,7 +1061,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateKernelNameConstant(
std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name));
return LLVM::createGlobalString(
loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
- LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers());
+ LLVM::Linkage::Internal);
}
// Emits LLVM IR to launch a kernel function. Expects the module that contains
@@ -1170,9 +1152,9 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
SmallString<128> nameBuffer(kernelModule.getName());
nameBuffer.append(kGpuBinaryStorageSuffix);
- Value data = LLVM::createGlobalString(
- loc, rewriter, nameBuffer.str(), binaryAttr.getValue(),
- LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers());
+ Value data =
+ LLVM::createGlobalString(loc, rewriter, nameBuffer.str(),
+ binaryAttr.getValue(), LLVM::Linkage::Internal);
// Pass the binary size. SPIRV requires binary size.
auto gpuBlob = binaryAttr.getValue();
@@ -1205,7 +1187,7 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
: adaptor.getAsyncDependencies().front();
// Create array of pointers to kernel arguments.
auto kernelParams = generateParamsArray(launchOp, adaptor, rewriter);
- auto nullpointer = rewriter.create<LLVM::ZeroOp>(loc, llvmPointerPointerType);
+ auto nullpointer = rewriter.create<LLVM::ZeroOp>(loc, llvmPointerType);
Value dynamicSharedMemorySize = launchOp.getDynamicSharedMemorySize()
? launchOp.getDynamicSharedMemorySize()
: zero;
@@ -1241,14 +1223,10 @@ static Value bitAndAddrspaceCast(Location loc,
if (destinationType.getAddressSpace() != sourceTy.getAddressSpace())
sourcePtr = rewriter.create<LLVM::AddrSpaceCastOp>(
loc,
- typeConverter.getPointerType(sourceTy.getElementType(),
- destinationType.getAddressSpace()),
+ LLVM::LLVMPointerType::get(rewriter.getContext(),
+ destinationType.getAddressSpace()),
sourcePtr);
-
- if (typeConverter.useOpaquePointers())
- return sourcePtr;
-
- return rewriter.create<LLVM::BitcastOp>(loc, destinationType, sourcePtr);
+ return sourcePtr;
}
LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
@@ -1366,8 +1344,6 @@ LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pTensor =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pTensor = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pTensor);
Type dType = op.getMemref().getType().getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
@@ -1388,7 +1364,7 @@ LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
auto handleSz = rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(11032));
handle = rewriter.create<LLVM::AllocaOp>(
- loc, llvmInt8PointerType, llvmInt8Type, handleSz, /*alignment=*/16);
+ loc, llvmPointerType, llvmInt8Type, handleSz, /*alignment=*/16);
handle = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, handle);
createLtDnMatCallBuilder
@@ -1457,11 +1433,6 @@ LogicalResult ConvertCreateCooOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowIdxs);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type iType =
llvm::cast<MemRefType>(op.getColIdxs().getType()).getElementType();
Type dType =
@@ -1489,10 +1460,6 @@ LogicalResult ConvertCreateCooAoSOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pIdxs = MemRefDescriptor(adaptor.getIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type iType = llvm::cast<MemRefType>(op.getIdxs().getType()).getElementType();
Type dType =
llvm::cast<MemRefType>(op.getValues().getType()).getElementType();
@@ -1522,11 +1489,6 @@ LogicalResult ConvertCreateCsrOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowPos);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getRowPos().getType()).getElementType();
Type iType =
@@ -1556,8 +1518,6 @@ LogicalResult ConvertCreate2To4SpMatOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pMat =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pMat = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pMat);
Type dType =
llvm::cast<MemRefType>(op.getMemref().getType()).getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
@@ -1566,7 +1526,7 @@ LogicalResult ConvertCreate2To4SpMatOpToGpuRuntimeCallPattern::matchAndRewrite(
auto handleSz = rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(44104));
Value handle = rewriter.create<LLVM::AllocaOp>(
- loc, llvmInt8PointerType, llvmInt8Type, handleSz, /*alignment=*/16);
+ loc, llvmPointerType, llvmInt8Type, handleSz, /*alignment=*/16);
handle = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, handle);
create2To4SpMatCallBuilder
@@ -1630,8 +1590,6 @@ LogicalResult ConvertSpMVOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
spMVCallBuilder.create(loc, rewriter,
{modeA, adaptor.getSpmatA(), adaptor.getDnX(),
adaptor.getDnY(), computeType, pBuf, stream});
@@ -1658,7 +1616,7 @@ LogicalResult ConvertSpMMBufferSizeOpToGpuRuntimeCallPattern::matchAndRewrite(
auto three = rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
rewriter.getIndexAttr(3));
auto bufferSize = rewriter.create<LLVM::AllocaOp>(
- loc, llvmInt64PointerType, llvmInt64Type, three, /*alignment=*/16);
+ loc, llvmPointerType, llvmPointerType, three, /*alignment=*/16);
createCuSparseLtSpMMBufferSizeBuilder
.create(loc, rewriter,
{bufferSize, modeA, modeB, adaptor.getSpmatA(),
@@ -1667,11 +1625,11 @@ LogicalResult ConvertSpMMBufferSizeOpToGpuRuntimeCallPattern::matchAndRewrite(
.getResult();
auto bufferSizePtr1 = rewriter.create<LLVM::GEPOp>(
- loc, llvmInt64PointerType, llvmInt64PointerType, bufferSize,
+ loc, llvmPointerType, llvmPointerType, bufferSize,
ValueRange{rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(1))});
auto bufferSizePtr2 = rewriter.create<LLVM::GEPOp>(
- loc, llvmInt64PointerType, llvmInt64PointerType, bufferSize,
+ loc, llvmPointerType, llvmPointerType, bufferSize,
ValueRange{rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(2))});
auto bufferSize0 =
@@ -1737,8 +1695,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite(
SmallVector<Value> pBufs;
for (Value buffer : adaptor.getBuffers()) {
Value pBuf = MemRefDescriptor(buffer).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
pBufs.push_back(pBuf);
}
createCuSparseLtSpMMBuilder.create(
@@ -1748,8 +1704,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite(
} else {
Value pBuf = MemRefDescriptor(adaptor.getBuffers().front())
.allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
createSpMMCallBuilder.create(loc, rewriter,
{modeA, modeB, adaptor.getSpmatA(),
adaptor.getDnmatB(), adaptor.getDnmatC(),
@@ -1762,8 +1716,7 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite(
template <typename T>
static void addOpaquePointerConversion(LLVMTypeConverter &converter) {
converter.addConversion([&converter](T) -> Type {
- return converter.getPointerType(
- IntegerType::get(&converter.getContext(), 8));
+ return LLVM::LLVMPointerType::get(&converter.getContext());
});
}
@@ -1781,8 +1734,6 @@ LogicalResult ConvertSDDMMOpToGpuRuntimeCallPattern::matchAndRewrite(
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
createSDDMMCallBuilder.create(loc, rewriter,
{modeA, modeB, adaptor.getDnmatA(),
adaptor.getDnmatB(), adaptor.getSpmatC(),
@@ -1837,9 +1788,6 @@ ConvertSpGEMMWorkEstimationOrComputeOpToGpuRuntimeCallPattern::matchAndRewrite(
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers())
- pBuf = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pBuf);
-
Value bufferSizeNew;
if (adaptor.getKind() ==
@@ -1896,18 +1844,18 @@ LogicalResult ConvertSpMatGetSizeOpToGpuRuntimeCallPattern::matchAndRewrite(
auto three = rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
rewriter.getIndexAttr(3));
auto buffer = rewriter.create<LLVM::AllocaOp>(
- loc, llvmInt64PointerType, llvmInt64Type, three, /*alignment=*/16);
+ loc, llvmPointerType, llvmInt64Type, three, /*alignment=*/16);
auto rowsPtr = rewriter.create<LLVM::GEPOp>(
- loc, llvmInt64PointerType, llvmInt64PointerType, buffer,
+ loc, llvmPointerType, llvmPointerType, buffer,
ValueRange{rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
rewriter.getIndexAttr(0))});
auto colsPtr = rewriter.create<LLVM::GEPOp>(
- loc, llvmInt64PointerType, llvmInt64PointerType, buffer,
+ loc, llvmPointerType, llvmPointerType, buffer,
ValueRange{rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
rewriter.getIndexAttr(1))});
auto nnzsPtr = rewriter.create<LLVM::GEPOp>(
- loc, llvmInt64PointerType, llvmInt64PointerType, buffer,
+ loc, llvmPointerType, llvmPointerType, buffer,
ValueRange{rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
rewriter.getIndexAttr(2))});
createSpMatGetSizeBuilder.create(
@@ -1934,11 +1882,6 @@ LogicalResult ConvertSetCsrPointersOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getCoordinates()).allocatedPtr(rewriter, loc);
Value pVal =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pPos);
- pCrd = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pCrd);
- pVal = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pVal);
- }
createSetCsrPointersBuilder.create(
loc, rewriter, {adaptor.getSpmat(), pPos, pCrd, pVal, stream});
rewriter.replaceOp(op, {stream});
@@ -1959,11 +1902,6 @@ LogicalResult ConvertCreateCscOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getRowIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pColPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColPos);
- pRowIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getColPos().getType()).getElementType();
Type iType =
@@ -1997,11 +1935,6 @@ LogicalResult ConvertCreateBsrOpToGpuRuntimeCallPattern::matchAndRewrite(
MemRefDescriptor(adaptor.getBColIdxs()).allocatedPtr(rewriter, loc);
Value pValues =
MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc);
- if (!getTypeConverter()->useOpaquePointers()) {
- pRowPos = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pRowPos);
- pColIdxs = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pColIdxs);
- pValues = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pValues);
- }
Type pType =
llvm::cast<MemRefType>(op.getBRowPos().getType()).getElementType();
Type iType =
diff --git a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
index 113d49c507e9c8e..f448d35992333b3 100644
--- a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
index 70450656b9df64f..ae8b7aaac7fd944 100644
--- a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
// CHECK-LABEL: llvm.func @main
diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
index 9df110d9b23bacf..1b9afcdf50a17f0 100644
--- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin use-opaque-pointers=1" -split-input-file | FileCheck %s
-// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco use-opaque-pointers=1" -split-input-file | FileCheck %s --check-prefix=ROCDL
+// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco" -split-input-file | FileCheck %s --check-prefix=ROCDL
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
index 5c8e6d11934dbba..3f86b0769827957 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
index 7e4b1191c5e6c29..aaced31813d574a 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
index f86d929e0e19acf..d4c0a76088356f5 100644
--- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
index a828c1d58da5f9e..d15efe354cfa8b4 100644
--- a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
diff --git a/mlir/test/Conversion/GPUCommon/transfer_write.mlir b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
index 9c2edf698548ba8..cba85915b49e43a 100644
--- a/mlir/test/Conversion/GPUCommon/transfer_write.mlir
+++ b/mlir/test/Conversion/GPUCommon/transfer_write.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
func.func @warp_extract(%arg0: index, %arg1: memref<1024x1024xf32>, %arg2: index, %arg3: vector<1xf32>) {
%c0 = arith.constant 0 : index
diff --git a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir b/mlir/test/Conversion/GPUCommon/typed-pointers.mlir
deleted file mode 100644
index e27162c7dbc1902..000000000000000
--- a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir
+++ /dev/null
@@ -1,82 +0,0 @@
-// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=0' --split-input-file | FileCheck %s
-
-module attributes {gpu.container_module} {
- // CHECK-LABEL: llvm.func @main
- // CHECK-SAME: %[[size:.*]]: i64
- func.func @main(%size : index) {
- // CHECK: %[[stream:.*]] = llvm.call @mgpuStreamCreate()
- %0 = gpu.wait async
- // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]]
- // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]]
- // CHECK: %[[isHostShared:.*]] = llvm.mlir.constant
- // CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]], %[[isHostShared]])
- %1, %2 = gpu.alloc async [%0] (%size) : memref<?xf32>
- // CHECK: %[[float_ptr:.*]] = llvm.extractvalue {{.*}}[0]
- // CHECK: %[[void_ptr:.*]] = llvm.bitcast %[[float_ptr]]
- // CHECK: llvm.call @mgpuMemFree(%[[void_ptr]], %[[stream]])
- %3 = gpu.dealloc async [%2] %1 : memref<?xf32>
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[stream]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[stream]])
- gpu.wait [%3]
- return
- }
-
- // CHECK: func @foo
- func.func @foo(%dst : memref<7xf32, 1>, %src : memref<7xf32>) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint
- // CHECK-NOT: llvm.addrspacecast
- // CHECK: %[[src:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]])
- %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32>
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
- gpu.wait [%t1]
- return
- }
-}
-
-// -----
-
-module attributes {gpu.container_module} {
-
- // CHECK: func @memset_f32
- func.func @memset_f32(%dst : memref<7xf32, 1>, %value : f32) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
- // CHECK: %[[value:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemset32(%[[dst]], %[[value]], %[[size_bytes]], %[[t0]])
- %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf32, 1>, f32
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
- gpu.wait [%t1]
- return
- }
-}
-
-// -----
-
-module attributes {gpu.container_module} {
-
- // CHECK: func @memset_f16
- func.func @memset_f16(%dst : memref<7xf16, 1>, %value : f16) {
- // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
- %t0 = gpu.wait async
- // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
- // CHECK: %[[value:.*]] = llvm.bitcast
- // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
- // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
- // CHECK: llvm.call @mgpuMemset16(%[[dst]], %[[value]], %[[size_bytes]], %[[t0]])
- %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf16, 1>, f16
- // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
- // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
- gpu.wait [%t1]
- return
- }
-}
More information about the Mlir-commits
mailing list