[Mlir-commits] [mlir] [MLIR][NVGPUToNVVM] Remove typed pointer support (PR #70867)
Christian Ulmann
llvmlistbot at llvm.org
Tue Oct 31 15:35:10 PDT 2023
https://github.com/Dinistro created https://github.com/llvm/llvm-project/pull/70867
This commit removes the support for lowering NVGPU to NVVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect.
Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
>From 000f72e6d0502a99cc6e2aef2f8cc218e4b11895 Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christianulmann at gmail.com>
Date: Tue, 31 Oct 2023 23:32:48 +0100
Subject: [PATCH] [MLIR][NVGPUToNVVM] Remove typed pointer support
This commit removes the support for lowering NVGPU to NVVM dialect with
typed pointers. Typed pointers have been deprecated for a while now and
it's planned to soon remove them from the LLVM dialect.
Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
---
mlir/include/mlir/Conversion/Passes.td | 5 --
.../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 19 ++----
.../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 2 +-
.../NVGPUToNVVM/typed-pointers.mlir | 59 -------------------
4 files changed, 5 insertions(+), 80 deletions(-)
delete mode 100644 mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 036c9b0039779ab..fb344ebd880e04d 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -823,11 +823,6 @@ def ConvertNVGPUToNVVMPass : Pass<"convert-nvgpu-to-nvvm"> {
let dependentDialects = [
"NVVM::NVVMDialect",
];
- let options = [
- Option<"useOpaquePointers", "use-opaque-pointers", "bool",
- /*default=*/"true", "Generate LLVM IR using opaque pointers "
- "instead of typed pointers">
- ];
}
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index efcde2ba58bd685..1977a571130ed12 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -402,7 +402,6 @@ struct ConvertNVGPUToNVVMPass
void runOnOperation() override {
LowerToLLVMOptions options(&getContext());
- options.useOpaquePointers = useOpaquePointers;
RewritePatternSet patterns(&getContext());
LLVMTypeConverter converter(&getContext(), options);
IRRewriter rewriter(&getContext());
@@ -451,7 +450,7 @@ struct ConvertNVGPUToNVVMPass
nvgpu::getMBarrierMemrefType(rewriter.getContext(), type));
});
converter.addConversion([&](nvgpu::TensorMapDescriptorType type) -> Type {
- return converter.getPointerType(type.getTensor().getElementType());
+ return LLVM::LLVMPointerType::get(type.getContext());
});
populateNVGPUToNVVMConversionPatterns(converter, patterns);
LLVMConversionTarget target(getContext());
@@ -651,16 +650,11 @@ struct NVGPUAsyncCopyLowering
Value dstPtr =
getStridedElementPtr(b.getLoc(), dstMemrefType, adaptor.getDst(),
adaptor.getDstIndices(), rewriter);
- auto i8Ty = IntegerType::get(op.getContext(), 8);
FailureOr<unsigned> dstAddressSpace =
getTypeConverter()->getMemRefAddressSpace(dstMemrefType);
if (failed(dstAddressSpace))
return rewriter.notifyMatchFailure(
loc, "destination memref address space not convertible to integer");
- auto dstPointerType =
- getTypeConverter()->getPointerType(i8Ty, *dstAddressSpace);
- if (!getTypeConverter()->useOpaquePointers())
- dstPtr = b.create<LLVM::BitcastOp>(dstPointerType, dstPtr);
auto srcMemrefType = cast<MemRefType>(op.getSrc().getType());
FailureOr<unsigned> srcAddressSpace =
@@ -671,13 +665,9 @@ struct NVGPUAsyncCopyLowering
Value scrPtr = getStridedElementPtr(loc, srcMemrefType, adaptor.getSrc(),
adaptor.getSrcIndices(), rewriter);
- auto srcPointerType =
- getTypeConverter()->getPointerType(i8Ty, *srcAddressSpace);
- if (!getTypeConverter()->useOpaquePointers())
- scrPtr = b.create<LLVM::BitcastOp>(srcPointerType, scrPtr);
// Intrinsics takes a global pointer so we need an address space cast.
- auto srcPointerGlobalType = getTypeConverter()->getPointerType(
- i8Ty, NVVM::NVVMMemorySpace::kGlobalMemorySpace);
+ auto srcPointerGlobalType = LLVM::LLVMPointerType::get(
+ op->getContext(), NVVM::NVVMMemorySpace::kGlobalMemorySpace);
scrPtr = b.create<LLVM::AddrSpaceCastOp>(srcPointerGlobalType, scrPtr);
int64_t dstElements = adaptor.getDstElements().getZExtValue();
int64_t sizeInBytes =
@@ -1128,8 +1118,7 @@ struct NVGPUTmaCreateDescriptorOpLowering
matchAndRewrite(nvgpu::TmaCreateDescriptorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
ImplicitLocOpBuilder b(op->getLoc(), rewriter);
- LLVM::LLVMPointerType llvmPointerType = getTypeConverter()->getPointerType(
- IntegerType::get(op->getContext(), 8));
+ auto llvmPointerType = LLVM::LLVMPointerType::get(op->getContext());
Type llvmInt64Type = IntegerType::get(op->getContext(), 64);
Value tensorElementType =
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 123a661193c4901..745cbdbd5153251 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-nvgpu-to-nvvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s -convert-nvgpu-to-nvvm | FileCheck %s
// RUN: mlir-opt %s -transform-interpreter | FileCheck %s
// CHECK-LABEL: @m16n8k16_fp16
diff --git a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir b/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir
deleted file mode 100644
index 1a37f1c046cf66d..000000000000000
--- a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir
+++ /dev/null
@@ -1,59 +0,0 @@
-// RUN: mlir-opt --convert-nvgpu-to-nvvm='use-opaque-pointers=0' --split-input-file %s | FileCheck %s
-
-// CHECK-LABEL: @async_cp(
-// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index)
-func.func @async_cp(
- %src: memref<128x128xf32>, %dst: memref<3x16x128xf32, 3>, %i : index) {
- // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
- // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<3 x i64>, array<3 x i64>)>
- // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(2048 : index) : i64
- // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
- // CHECK-DAG: %[[S1:.*]] = llvm.mlir.constant(128 : index) : i64
- // CHECK-DAG: %[[FI0:.*]] = llvm.mul %[[IDX1]], %[[S1]] : i64
- // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[FI0]] : i64
- // CHECK-DAG: %[[FI2:.*]] = llvm.add %[[FI1]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI2]]] : (!llvm.ptr<f32, 3>, i64) -> !llvm.ptr<f32, 3>
- // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<f32, 3> to !llvm.ptr<i8, 3>
- // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK-DAG: %[[S3:.*]] = llvm.mlir.constant(128 : index) : i64
- // CHECK-DAG: %[[FI3:.*]] = llvm.mul %[[IDX1]], %[[S3]] : i64
- // CHECK-DAG: %[[FI4:.*]] = llvm.add %[[FI3]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI4]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
- // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr<f32> to !llvm.ptr<i8>
- // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr<i8> to !llvm.ptr<i8, 1>
- // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca
- %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 : memref<128x128xf32> to memref<3x16x128xf32, 3>
- // CHECK: nvvm.cp.async.commit.group
- %1 = nvgpu.device_async_create_group %0
- // CHECK: nvvm.cp.async.wait.group 1
- nvgpu.device_async_wait %1 { numGroups = 1 : i32 }
-
- // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg
- %2 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 {bypassL1}: memref<128x128xf32> to memref<3x16x128xf32, 3>
- return
-}
-
-// -----
-
-// CHECK-LABEL: @async_cp_i4(
-// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index)
-func.func @async_cp_i4(
- %src: memref<128x64xi4>, %dst: memref<128x128xi4, 3>, %i : index) -> !nvgpu.device.async.token {
- // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
- // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4, 3>, ptr<i4, 3>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(128 : index) : i64
- // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
- // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI1]]] : (!llvm.ptr<i4, 3>, i64) -> !llvm.ptr<i4, 3>
- // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<i4, 3> to !llvm.ptr<i8, 3>
- // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4>, ptr<i4>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK-DAG: %[[S2:.*]] = llvm.mlir.constant(64 : index) : i64
- // CHECK-DAG: %[[FI2:.*]] = llvm.mul %[[IDX1]], %[[S2]] : i64
- // CHECK-DAG: %[[FI3:.*]] = llvm.add %[[FI2]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI3]]] : (!llvm.ptr<i4>, i64) -> !llvm.ptr<i4>
- // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr<i4> to !llvm.ptr<i8>
- // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr<i8> to !llvm.ptr<i8, 1>
- // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca
- %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i], 32 : memref<128x64xi4> to memref<128x128xi4, 3>
- return %0 : !nvgpu.device.async.token
-}
More information about the Mlir-commits
mailing list