[Mlir-commits] [mlir] [MLIR][NVVM] Add pmevent (PR #134999)
Guray Ozen
llvmlistbot at llvm.org
Wed Apr 9 05:59:46 PDT 2025
https://github.com/grypp created https://github.com/llvm/llvm-project/pull/134999
Add `nvvm.pmevent` Op that Triggers one or more of a fixed number of performance monitor events, with event index or mask specified by immediate operand.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent)
>From 73b9df0e4ee059cc66f86fc4227ff682139b82c1 Mon Sep 17 00:00:00 2001
From: Guray Ozen <gozen at nvidia.com>
Date: Wed, 9 Apr 2025 14:58:47 +0200
Subject: [PATCH] [MLIR][NVVM] Add pmevent
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 31 ++++++++++++++++++
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 36 +++++++++++++++++----
mlir/test/Target/LLVMIR/nvvmir-invalid.mlir | 21 +++++++++++-
mlir/test/Target/LLVMIR/nvvmir.mlir | 17 ++++++++++
4 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0a6e66919f021..003b393fa5cd5 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -301,6 +301,37 @@ def NVVM_ReduxOp :
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM Performance Monitor events
+//===----------------------------------------------------------------------===//
+
+def NVVM_PMEventOp : NVVM_Op<"pmevent">,
+ Arguments<(ins OptionalAttr<I16Attr>:$maskedEventId,
+ OptionalAttr<I32Attr>:$eventId)> {
+ let summary = "Trigger one or more Performance Monitor events.";
+
+ let description = [{
+ Triggers one or more of a fixed number of performance monitor events, with
+ event index or mask specified by immediate operand.
+
+ Without `mask` it triggers a single performance monitor event indexed by
+ immediate operand a, in the range 0..15.
+
+ With `mask` it triggers one or more of the performance monitor events. Each
+ bit in the 16-bit immediate operand a controls an event.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent)
+ }];
+
+ let assemblyFormat = "attr-dict (`id` `=` $eventId^)? (`mask` `=` $maskedEventId^)?";
+
+ string llvmBuilder = [{
+ // TODO
+ }];
+
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// NVVM Split arrive/wait barrier
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 09bff6101edd3..d926f388429f1 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/LogicalResult.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -243,7 +244,8 @@ void MmaOp::print(OpAsmPrinter &p) {
p.printOptionalAttrDict(this->getOperation()->getAttrs(), ignoreAttrNames);
// Print the types of the operands and result.
- p << " : " << "(";
+ p << " : "
+ << "(";
llvm::interleaveComma(SmallVector<Type, 3>{frags[0].regs[0].getType(),
frags[1].regs[0].getType(),
frags[2].regs[0].getType()},
@@ -992,7 +994,9 @@ std::string NVVM::WgmmaMmaAsyncOp::getPtx() {
ss << "},";
// Need to map read/write registers correctly.
regCnt = (regCnt * 2);
- ss << " $" << (regCnt) << "," << " $" << (regCnt + 1) << "," << " p";
+ ss << " $" << (regCnt) << ","
+ << " $" << (regCnt + 1) << ","
+ << " p";
if (getTypeD() != WGMMATypes::s32) {
ss << ", $" << (regCnt + 3) << ", $" << (regCnt + 4);
}
@@ -1219,7 +1223,7 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
: CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, tile)
#define GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col) \
- [&]() -> auto { \
+ [&]() -> auto{ \
switch (dims) { \
case 1: \
return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 1, tile); \
@@ -1234,7 +1238,8 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
default: \
llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp."); \
} \
- }()
+ } \
+ ()
llvm::Intrinsic::ID CpAsyncBulkTensorReduceOp::getIntrinsicID(
int tensorDims, NVVM::TMAReduxKind kind, bool isIm2Col) {
@@ -1364,13 +1369,14 @@ Tcgen05CommitOp::getIntrinsicIDAndArgs(Operation &op,
: TCGEN05_CP_IMPL(shape_mc, src_fmt, _cg1)
#define GET_TCGEN05_CP_ID(shape_mc, src_fmt, is_2cta) \
- [&]() -> auto { \
+ [&]() -> auto{ \
if (src_fmt == Tcgen05CpSrcFormat::B6x16_P32) \
return TCGEN05_CP_2CTA(shape_mc, _b6x16_p32, is_2cta); \
if (src_fmt == Tcgen05CpSrcFormat::B4x16_P64) \
return TCGEN05_CP_2CTA(shape_mc, _b4x16_p64, is_2cta); \
return TCGEN05_CP_2CTA(shape_mc, , is_2cta); \
- }()
+ } \
+ ()
llvm::Intrinsic::ID Tcgen05CpOp::getIntrinsicID(Operation &op) {
auto curOp = cast<NVVM::Tcgen05CpOp>(op);
@@ -1536,6 +1542,24 @@ LogicalResult NVVMDialect::verifyRegionArgAttribute(Operation *op,
return success();
}
+LogicalResult PMEventOp::verify() {
+ if (!getMaskedEventId() && !getEventId()) {
+ return emitOpError() << "either `id` or `mask` must be set";
+ }
+
+ if (getMaskedEventId() && getEventId()) {
+ return emitOpError() << "`id` and `mask` cannot be set at the same time";
+ }
+
+ if (getEventId()) {
+ if (getEventId() < 0 || getEventId() > 15) {
+ return emitOpError() << "`id` must be between 0 and 15";
+ }
+ }
+
+ return llvm::success();
+}
+
//===----------------------------------------------------------------------===//
// NVVM target attribute.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index f87f11daeef54..4a343be3fb934 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -1,4 +1,23 @@
-// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s
+// RUN: mlir-opt -verify-diagnostics -split-input-file %s
+
+llvm.func @pmevent_no_id() {
+ // expected-error @below {{either `id` or `mask` must be set}}
+ nvvm.pmevent
+}
+
+// -----
+
+llvm.func @pmevent_bigger15() {
+ // expected-error @below {{`id` must be between 0 and 15}}
+ nvvm.pmevent id = 141
+}
+
+// -----
+
+llvm.func @pmevent_many_ids() {
+ // expected-error @below {{`id` and `mask` cannot be set at the same time}}
+ nvvm.pmevent id = 1 mask = 1
+}
// -----
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 3a0713f2feee8..36cfb6be18da7 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -832,6 +832,7 @@ llvm.func @nvvm_match_sync(%mask: i32, %val32: i32, %val64: i64) {
}
// -----
+
// CHECK-LABEL: @nvvm_st_bulk
llvm.func @nvvm_st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size: i64) {
// CHECK: call void @llvm.nvvm.st.bulk(ptr %{{.*}}, i64 %{{.*}}, i64 0)
@@ -844,3 +845,19 @@ llvm.func @nvvm_st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size:
nvvm.st.bulk %addr_shared, size = %size, init = 0: !llvm.ptr<3>
llvm.return
}
+
+
+// -----
+
+// CHECK-LABEL: @nvvm_pmevent
+llvm.func @nvvm_pmevent() {
+ nvvm.pmevent id = 0
+
+ nvvm.pmevent id = 15
+
+ nvvm.pmevent mask = 15000
+
+ nvvm.pmevent mask = 10
+
+ llvm.return
+}
More information about the Mlir-commits
mailing list