[Mlir-commits] [mlir] MLIR][NVGPU] Add mbarrier.get Op (PR #133221)
Guray Ozen
llvmlistbot at llvm.org
Thu Mar 27 01:57:09 PDT 2025
https://github.com/grypp created https://github.com/llvm/llvm-project/pull/133221
The `mbarrier.create` op can create multiple mbarrier objects, and other mbarrier-related ops can access an mbarrier using a dynamic SSA value. This is especially useful when using mbarriers in dynamic loops.
This PR adds the `mbarrier.get` op, which returns a pointer to a specific mbarrier object from a group of barriers created by the nvgpu.mbarrier.create operation. It is useful when composing the NVGPU and NVVM dialects.
Example:
```
%mbars = nvgpu.mbarrier.create
-> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10>
%mbar_pointer = nvgpu.mbarrier.get %mbars[%c2]
: !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>>
-> i32
```
>From b552b37bfc025c006a27b7b28b5ffc22bbf39e3b Mon Sep 17 00:00:00 2001
From: Guray Ozen <gozen at nvidia.com>
Date: Thu, 27 Mar 2025 09:54:41 +0100
Subject: [PATCH] MLIR][NVGPU] Add mbarrier.get Op
The `mbarrier.create` op can create multiple mbarrier objects, and other mbarrier-related ops can access an mbarrier using a dynamic SSA value. This is especially useful when using mbarriers in dynamic loops.
This PR adds the `mbarrier.get` op, which returns a pointer to a specific mbarrier object from a group of barriers created by the nvgpu.mbarrier.create operation. It is useful when composing the NVGPU and NVVM dialects.
Example:
```
%mbars = nvgpu.mbarrier.create -> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10>
%mbar_pointer = nvgpu.mbarrier.get %mbars[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>>
```
---
.../include/mlir/Dialect/NVGPU/IR/NVGPUOps.td | 19 ++++++++++++++++
.../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 19 ++++++++++++++++
.../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 22 +++++++++++++++++++
3 files changed, 60 insertions(+)
diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td
index eb0fb90d271ed..ec68364d47e4f 100644
--- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td
+++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td
@@ -322,6 +322,25 @@ def NVGPU_MBarrierCreateOp : NVGPU_Op<"mbarrier.create", []> {
}];
}
+def NVGPU_MBarrierGetOp : NVGPU_Op<"mbarrier.get", []> {
+ let summary = "Return a pointer to an `nvgpu.mbarrier`.";
+ let description = [{
+ The `nvgpu.mbarrier.get` operation retrieves a pointer to a specific
+ `mbarrier` object from a group of barriers created by the `nvgpu.mbarrier.create` operation.
+
+ Example:
+ ```mlir
+ %mbars = nvgpu.mbarrier.create -> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10>
+ %mbar_pointer = nvgpu.mbarrier.get %mbars[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>>
+ ```
+ }];
+ let arguments = (ins NVGPU_MBarrierGroup:$barriers, Index:$mbarId);
+ let results = (outs AnyTypeOf<[I32, I64]>:$mbarrierPointer);
+ let assemblyFormat = [{
+ $barriers `[` $mbarId `]` attr-dict `:` type($barriers) `->` type($mbarrierPointer)
+ }];
+}
+
def NVGPU_MBarrierInitOp : NVGPU_Op<"mbarrier.init", []> {
let summary = "Initialize the `nvgpu.mbarrier`.";
let description = [{
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index f53de416f2abd..31c28a6008a22 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -819,6 +819,24 @@ struct MBarrierBasePattern : public ConvertOpToLLVMPattern<SourceOp> {
}
};
+struct NVGPUMBarrierGetLowering
+ : public MBarrierBasePattern<nvgpu::MBarrierGetOp> {
+ using MBarrierBasePattern<nvgpu::MBarrierGetOp>::MBarrierBasePattern;
+
+ LogicalResult
+ matchAndRewrite(nvgpu::MBarrierGetOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ ImplicitLocOpBuilder b(op->getLoc(), rewriter);
+ nvgpu::MBarrierGroupType mbarrierType = op.getBarriers().getType();
+ rewriter.setInsertionPoint(op);
+ Value barrier = getMbarrierPtr(b, mbarrierType, adaptor.getBarriers(),
+ adaptor.getMbarId(), rewriter);
+ Type resType = op.getMbarrierPointer().getType();
+ rewriter.replaceOpWithNewOp<LLVM::PtrToIntOp>(op, resType, barrier);
+ return success();
+ }
+};
+
/// Lowers `nvgpu.mbarrier.init` to `nvvm.mbarrier.init`
struct NVGPUMBarrierInitLowering
: public MBarrierBasePattern<nvgpu::MBarrierInitOp> {
@@ -1706,6 +1724,7 @@ void mlir::populateNVGPUToNVVMConversionPatterns(
patterns.add<
NVGPUMBarrierCreateLowering, // nvgpu.mbarrier.create
NVGPUMBarrierInitLowering, // nvgpu.mbarrier.init
+ NVGPUMBarrierGetLowering, // nvgpu.mbarrier.get
NVGPUMBarrierArriveLowering, // nvgpu.mbarrier.arrive
NVGPUMBarrierArriveNoCompleteLowering, // nvgpu.mbarrier.arrive.no_complete
NVGPUMBarrierTestWaitLowering, // nvgpu.mbarrier.test_wait_parity
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 6b59b5e4343b4..cb4dd7d3b2961 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -532,6 +532,28 @@ func.func @mbarrier_nocomplete() {
func.return
}
+// CHECK-LABEL: func @mbarrier_get
+// CHECK-SAME: %[[ARG0:.*]]: !nvgpu.mbarrier.group{{.*}}
+func.func @mbarrier_get(%barriers : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5>) {
+ // CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[ARG0]] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+ // CHECK: %[[c2:.+]] = arith.constant 2 : index
+ // CHECK: %[[S1:.+]] = builtin.unrealized_conversion_cast %[[c2]] : index to i64
+ // CHECK: %[[S2:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+ // CHECK: %[[S3:.+]] = llvm.getelementptr %[[S2]][%[[S1]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
+ // CHECK: %[[S4:.+]] = llvm.ptrtoint %[[S3]] : !llvm.ptr<3> to i32
+ %c2 = arith.constant 2 : index
+ nvgpu.mbarrier.get %barriers[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> -> i32
+
+ // CHECK: %[[c4:.+]] = arith.constant 4 : index
+ // CHECK: %[[S5:.+]] = builtin.unrealized_conversion_cast %[[c4]] : index to i64
+ // CHECK: %[[S6:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+ // CHECK: %[[S7:.+]] = llvm.getelementptr %[[S6]][%[[S5]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
+ // CHECK: %[[S8:.+]] = llvm.ptrtoint %[[S7]] : !llvm.ptr<3> to i64
+ %c4 = arith.constant 4 : index
+ nvgpu.mbarrier.get %barriers[%c4] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> -> i64
+ func.return
+}
+
// CHECK-LABEL: func @mbarrier_wait(
// CHECK-SAME: %[[ARG0:.*]]: !nvgpu.mbarrier.group{{.*}}, %[[ARG1:.*]]: !nvgpu.mbarrier.token)
func.func @mbarrier_wait(%barriers : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5>, %token : !tokenType) {
More information about the Mlir-commits
mailing list