[Mlir-commits] [mlir] 39cdefb - [mlir][nvvm] Add prefetch.tensormap (#67564)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Oct 17 04:03:42 PDT 2023
Author: Guray Ozen
Date: 2023-10-17T13:03:37+02:00
New Revision: 39cdefb5b52b3786993bca243d589de19896fca1
URL: https://github.com/llvm/llvm-project/commit/39cdefb5b52b3786993bca243d589de19896fca1
DIFF: https://github.com/llvm/llvm-project/commit/39cdefb5b52b3786993bca243d589de19896fca1.diff
LOG: [mlir][nvvm] Add prefetch.tensormap (#67564)
This PR adds `prefetch.tensormap` Op. It brings the cache line
containing the given tma descriptor for subsequent use by the
cp.async.bulk.tensor instruction.
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prefetch-prefetchu
Added:
Modified:
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td
mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index d550fe1f33140ed..cefdd7cc4033a11 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1438,6 +1438,17 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp :
let hasVerifier = 1;
}
+def NVVM_PrefetchTensorMapOp : NVVM_Op<"prefetch.tensormap",
+ [DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>]>,
+ Arguments<(ins LLVM_i64ptr_any:$tmaDescriptor, PtxPredicate:$predicate)> {
+ let assemblyFormat = "$tmaDescriptor (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)";
+ let extraClassDefinition = [{
+ std::string $cppClass::getPtx() {
+ return std::string("prefetch.tensormap [%0];");
+ }
+ }];
+}
+
//===----------------------------------------------------------------------===//
// NVVM Wgmma Ops
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td
index fd16376be366912..dd00355b6d77e33 100644
--- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td
+++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td
@@ -619,6 +619,18 @@ def NVGPU_MBarrierTryWaitParityOp : NVGPU_Op<"mbarrier.try_wait.parity", []> {
let assemblyFormat = "$barriers `[` $mbarId `]` `,` $phase `,` $ticks attr-dict `:` type($barriers)";
}
+def NVGPU_TmaPrefetchOp : NVGPU_Op<"tma.prefetch.descriptor", []> {
+ let summary = "Prefetch given `nvgpu.tensormap.descriptor` ";
+ let description = [{
+ The Op brings the cache line containing the given `$tmaDescriptor` for
+ subsequent use by the `tma.async.load` instruction.
+ }];
+ let arguments = (ins NVGPU_TensorMapDescriptor:$tensorMapDescriptor, Optional<I1>:$predicate);
+ let assemblyFormat = [{
+ $tensorMapDescriptor (`,` $predicate^)? attr-dict `:` type($tensorMapDescriptor)
+ }];
+}
+
def NVGPU_TmaAsyncLoadOp : NVGPU_Op<"tma.async.load", []> {
let summary = "TMA asynchronous load";
let description = [{
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index 029659a2f855416..7eb6f42d2788e35 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -1610,6 +1610,18 @@ struct NVGPUWarpgroupMmaInitAccumulatorOpLowering
}
};
+struct NVGPUTmaPrefetchOpLowering
+ : public ConvertOpToLLVMPattern<nvgpu::TmaPrefetchOp> {
+ using ConvertOpToLLVMPattern<nvgpu::TmaPrefetchOp>::ConvertOpToLLVMPattern;
+ LogicalResult
+ matchAndRewrite(nvgpu::TmaPrefetchOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ rewriter.replaceOpWithNewOp<NVVM::PrefetchTensorMapOp>(
+ op, adaptor.getTensorMapDescriptor(), adaptor.getPredicate());
+ return success();
+ }
+};
+
} // namespace
void mlir::populateNVGPUToNVVMConversionPatterns(LLVMTypeConverter &converter,
@@ -1623,6 +1635,7 @@ void mlir::populateNVGPUToNVVMConversionPatterns(LLVMTypeConverter &converter,
NVGPUMBarrierTryWaitParityLowering, // nvgpu.mbarrier.try_wait_parity
NVGPUTmaAsyncLoadOpLowering, // nvgpu.tma.async.load
NVGPUTmaCreateDescriptorOpLowering, // nvgpu.tma.create.descriptor
+ NVGPUTmaPrefetchOpLowering, // nvgpu.tma.prefetch.descriptor
NVGPUMBarrierArriveExpectTxLowering, // nvgpu.mbarrier.arrive.expect_tx
NVGPUGenerateWarpgroupDescriptorLowering, // nvgpu.warpgroup.generate.descriptor
NVGPUWarpgroupMmaOpLowering, // nvgpu.warpgroup.mma
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index bf660e2683158e5..8971585e03c7add 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -643,6 +643,17 @@ func.func @create_tensor_map(%devicePtr2d : memref<64x128xf32>, %devicePtr1d : m
func.return
}
+// CHECK-LABEL: @tma_prefetch(
+// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !nvgpu.tensormap.descriptor<tensor = memref<128xf32, 3>, swizzle = none, l2promo = none, oob = nan, interleave = none>, %[[arg1:[a-zA-Z0-9_]+]]: i1
+func.func @tma_prefetch(%tensorMap1d: !tensorMap1d, %p : i1) {
+ // CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[arg0]] : !nvgpu.tensormap.descriptor<tensor = memref<128xf32, 3>, swizzle = none, l2promo = none, oob = nan, interleave = none> to !llvm.ptr
+ // CHECK: nvvm.prefetch.tensormap %[[S0]] : !llvm.ptr
+ nvgpu.tma.prefetch.descriptor %tensorMap1d: !tensorMap1d
+ // CHECK: nvvm.prefetch.tensormap %[[S0]], predicate = %[[arg1]] : !llvm.ptr, i1
+ nvgpu.tma.prefetch.descriptor %tensorMap1d, %p: !tensorMap1d
+ func.return
+}
+
!lhsTensorMap = !nvgpu.tensormap.descriptor<tensor = memref<128x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
!rhsTensorMap = !nvgpu.tensormap.descriptor<tensor = memref<64x128xf16, strided<[128, 1], offset: 8192>, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
index fcc882f562a4a95..0d0ac9637438a95 100644
--- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
+++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
@@ -504,3 +504,14 @@ func.func @elect_one_leader_sync() {
%cnd = nvvm.elect.sync -> i1
return
}
+
+// -----
+
+// CHECK-LABEL: @init_mbarrier_arrive_expect_tx
+llvm.func @init_mbarrier_arrive_expect_tx(%desc : !llvm.ptr, %pred : i1) {
+ //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "prefetch.tensormap [$0];", "l"
+ nvvm.prefetch.tensormap %desc : !llvm.ptr
+ //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$1 prefetch.tensormap [$0];", "l,b"
+ nvvm.prefetch.tensormap %desc, predicate = %pred : !llvm.ptr, i1
+ llvm.return
+}
More information about the Mlir-commits
mailing list