[Mlir-commits] [mlir] [MLIR][NVVM] Add support for st.bulk Op (PR #131727)
Srinivasa Ravi
llvmlistbot at llvm.org
Tue Mar 18 03:30:47 PDT 2025
https://github.com/Wolfram70 updated https://github.com/llvm/llvm-project/pull/131727
>From 6644c58ebf02876835f0def939b9199436dbc1f6 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Wed, 26 Feb 2025 15:20:40 +0530
Subject: [PATCH] [MLIR][NVVM] Add support for st.bulk Op
This change adds the `st.bulk` NVVM Op for the `st.bulk` instruction
introduced in ptx8.6 for sm_100.
PTX Spec Reference: https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-st-bulk
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 30 +++++++++++++++++++
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 6 ++++
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 9 ++++++
mlir/test/Dialect/LLVMIR/nvvm.mlir | 9 ++++++
mlir/test/Target/LLVMIR/nvvmir-invalid.mlir | 8 +++++
mlir/test/Target/LLVMIR/nvvmir.mlir | 10 +++++++
6 files changed, 72 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index ff6696f6bec40..8fd555b91f6e7 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2583,6 +2583,36 @@ def NVVM_MapaOp: NVVM_Op<"mapa",
let assemblyFormat = "$a`,` $b attr-dict `:` type($a) `->` type($res)";
}
+//===----------------------------------------------------------------------===//
+// NVVM Bulk Store Op
+//===----------------------------------------------------------------------===//
+
+def NVVM_BulkStoreOp: NVVM_Op<"st.bulk"> {
+ let arguments = (ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr, I64:$size, I64Attr:$initVal);
+
+ let summary = "Bulk Store Op";
+ let description = [{
+ Initializes a region of shared memory at the address given by `addr`.
+ The `size` operand specifies the number of bytes to initialize and must be
+ a multiple of 8.
+ The `initVal` operand specifies the value to initialize the memory to. The
+ only supported value is 0.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-st-bulk)
+ }];
+
+ string llvmBuilder = [{
+ auto intId = getStBulkIntrinsicId(
+ llvm::cast<LLVM::LLVMPointerType>(op.getAddr().getType()));
+ createIntrinsicCall(builder, intId,
+ {$addr, $size, builder.getInt64($initVal)});
+ }];
+
+ let assemblyFormat = "$addr`,` $size`,` $initVal attr-dict `:` type($addr)";
+
+ let hasVerifier = 1;
+}
+
def NVVM_Exit : NVVM_Op<"exit"> {
let summary = "Exit Op";
let description = [{
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 8f080a2d597a5..4d86424679b6f 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -160,6 +160,12 @@ LogicalResult CvtFloatToTF32Op::verify() {
return success();
}
+LogicalResult BulkStoreOp::verify() {
+ if (getInitVal() != 0)
+ return emitOpError("only 0 is supported for initVal in st.bulk");
+ return success();
+}
+
// Given the element type of an operand and whether or not it is an accumulator,
// this function returns the PTX type (`NVVM::MMATypes`) that corresponds to the
// operand's element type.
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index c3a129a82688f..bedac8a7c17b4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -135,6 +135,15 @@ static llvm::Intrinsic::ID getLdMatrixIntrinsicId(NVVM::MMALayout layout,
}
}
+/// Return the intrinsic ID associated with st.bulk for the given address type.
+static llvm::Intrinsic::ID
+getStBulkIntrinsicId(LLVM::LLVMPointerType addrType) {
+ bool isSharedMemory =
+ addrType.getAddressSpace() == NVVM::NVVMMemorySpace::kSharedMemorySpace;
+ return isSharedMemory ? llvm::Intrinsic::nvvm_st_bulk_shared_cta
+ : llvm::Intrinsic::nvvm_st_bulk;
+}
+
static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
NVVM::ProxyKind toProxy,
NVVM::MemScopeKind scope,
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 85998d4e66254..d61c4d0a965b5 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -550,6 +550,15 @@ func.func @mapa(%a: !llvm.ptr, %a_shared: !llvm.ptr<3>, %b : i32) {
return
}
+// CHECK-LABEL: @st_bulk
+func.func @st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size: i64) {
+ // CHECK: nvvm.st.bulk %{{.*}}, %{{.*}}, 0 : !llvm.ptr
+ nvvm.st.bulk %addr_gen, %size, 0 : !llvm.ptr
+ // CHECK: nvvm.st.bulk %{{.*}}, %{{.*}}, 0 : !llvm.ptr<3>
+ nvvm.st.bulk %addr_shared, %size, 0 : !llvm.ptr<3>
+ return
+}
+
// -----
// Just check these don't emit errors.
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 4fca7fd801dbe..e138dd22c1f82 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -125,6 +125,14 @@ llvm.func @convert_float_to_tf32_no_rnd_mode(%src : f32) -> i32 {
// -----
+llvm.func @nvvm_st_bulk_initval_nonzero(%addr : !llvm.ptr, %size : i64) {
+ // expected-error @below {{only 0 is supported for initVal in st.bulk}}
+ nvvm.st.bulk %addr, %size, 1 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
llvm.func @nvvm_tcgen05_cp_128x256b_mc(%taddr : !llvm.ptr<6>, %smem_desc : i64) {
// expected-error @below {{Invalid multicast type for tcgen05.cp Op}}
nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape<shape_128x256b>, multicast = #nvvm.tcgen05_cp_multicast<warpx2_02_13>}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index f39aca95b918f..07bcdcd369909 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -810,3 +810,13 @@ llvm.func @nvvm_redux_sync_f32(%value: f32, %offset: i32) {
%7 = nvvm.redux.sync fmax %value, %offset {abs = true, nan = true}: f32 -> f32
llvm.return
}
+
+// -----
+// CHECK-LABEL: @nvvm_st_bulk
+llvm.func @nvvm_st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size: i64) {
+ // CHECK: call void @llvm.nvvm.st.bulk(ptr %{{.*}}, i64 %{{.*}}, i64 0)
+ nvvm.st.bulk %addr_gen, %size, 0 : !llvm.ptr
+ // CHECK: call void @llvm.nvvm.st.bulk.shared.cta(ptr addrspace(3) %{{.*}}, i64 %{{.*}}, i64 0)
+ nvvm.st.bulk %addr_shared, %size, 0: !llvm.ptr<3>
+ llvm.return
+}
More information about the Mlir-commits
mailing list