[Mlir-commits] [mlir] [mlir][nvvm] Add prefetch.tensormap (PR #67564)

Wed Sep 27 07:52:47 PDT 2023

https://github.com/grypp created https://github.com/llvm/llvm-project/pull/67564

This PR adds `prefetch.tensormap` Op. It brings the cache line containing the given tma descriptor for subsequent use by the cp.async.bulk.tensor instruction.

https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prefetch-prefetchu

>From e9ba8ade6a5f3ed27207fae53bc1880517c2202f Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Wed, 27 Sep 2023 16:51:38 +0200
Subject: [PATCH] [mlir][nvvm] Add prefetch.tensormap

This PR adds `prefetch.tensormap` Op. It brings the cache line containing the given tma descriptor for subsequent use by the cp.async.bulk.tensor instruction.

https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prefetch-prefetchu
---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td       | 15 +++++++++++++++
 mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 11 +++++++++++
 2 files changed, 26 insertions(+)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0d4d734edd2b69b..e9c52c06ed27ebd 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1512,6 +1512,21 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : NVVM_Op<"cp.async.bulk.tensor.gl
   let hasVerifier = 1;
 }
 
+def NVVM_PrefetchTensorMapOp : NVVM_Op<"prefetch.tensormap",
+                    [DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>]>,
+  Arguments<(ins LLVM_i64ptr_any:$tmaDescriptor, PtxPredicate:$predicate)> {
+  let description = [{
+    The Op brings the cache line containing the given $tmaDescriptor for 
+    subsequent use by the `cp.async.bulk.tensor` instruction.
+  }];
+  let assemblyFormat = "$tmaDescriptor (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)";
+  let extraClassDefinition = [{
+    std::string $cppClass::getPtx() { 
+      return std::string("prefetch.tensormap [%0];");
+    }
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // NVVM Wgmma Ops
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
index 7ffe1ad2bb2b111..8ff8868e96ace11 100644
--- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
+++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
@@ -363,3 +363,14 @@ func.func @wgmma_f32_e5m2_e4m3(%descA : i64, %descB : i64) -> !mat32f32 {
       : !mat32f32 -> !mat32f32
   return %result2 : !mat32f32
 }
+
+// -----
+
+// CHECK-LABEL: @init_mbarrier_arrive_expect_tx
+llvm.func @init_mbarrier_arrive_expect_tx(%desc : !llvm.ptr, %pred : i1) {
+  //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "prefetch.tensormap [$0];", "l"
+  nvvm.prefetch.tensormap %desc : !llvm.ptr
+  //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$1 prefetch.tensormap [$0];", "l,b"
+  nvvm.prefetch.tensormap %desc, predicate = %pred : !llvm.ptr, i1
+  llvm.return
+}