[Mlir-commits] [mlir] MLIR][NVGPU] Add mbarrier.get Op (PR #133221)

Guray Ozen llvmlistbot at llvm.org
Thu Mar 27 01:57:09 PDT 2025


https://github.com/grypp created https://github.com/llvm/llvm-project/pull/133221

The `mbarrier.create` op can create multiple mbarrier objects, and other mbarrier-related ops can access an mbarrier using a dynamic SSA value. This is especially useful when using mbarriers in dynamic loops.

This PR adds the `mbarrier.get` op, which returns a pointer to a specific mbarrier object from a group of barriers created by the nvgpu.mbarrier.create operation. It is useful when composing the NVGPU and NVVM dialects.

Example:
```
%mbars = nvgpu.mbarrier.create 
   -> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10>

%mbar_pointer = nvgpu.mbarrier.get %mbars[%c2] 
  : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>> 
  -> i32
 ```

>From b552b37bfc025c006a27b7b28b5ffc22bbf39e3b Mon Sep 17 00:00:00 2001
From: Guray Ozen <gozen at nvidia.com>
Date: Thu, 27 Mar 2025 09:54:41 +0100
Subject: [PATCH] MLIR][NVGPU] Add mbarrier.get Op

The `mbarrier.create` op can create multiple mbarrier objects, and other mbarrier-related ops can access an mbarrier using a dynamic SSA value. This is especially useful when using mbarriers in dynamic loops.

This PR adds the `mbarrier.get` op, which returns a pointer to a specific mbarrier object from a group of barriers created by the nvgpu.mbarrier.create operation. It is useful when composing the NVGPU and NVVM dialects.

Example:
```
%mbars = nvgpu.mbarrier.create -> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10>

%mbar_pointer = nvgpu.mbarrier.get %mbars[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>>
 ```
---
 .../include/mlir/Dialect/NVGPU/IR/NVGPUOps.td | 19 ++++++++++++++++
 .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp    | 19 ++++++++++++++++
 .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 22 +++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td
index eb0fb90d271ed..ec68364d47e4f 100644
--- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td
+++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td
@@ -322,6 +322,25 @@ def NVGPU_MBarrierCreateOp : NVGPU_Op<"mbarrier.create", []> {
   }];
 }
 
+def NVGPU_MBarrierGetOp : NVGPU_Op<"mbarrier.get", []> {
+  let summary = "Return a pointer to an `nvgpu.mbarrier`.";
+  let description = [{
+    The `nvgpu.mbarrier.get` operation retrieves a pointer to a specific 
+    `mbarrier` object from a group of barriers created by the `nvgpu.mbarrier.create` operation.
+
+    Example:
+    ```mlir
+      %mbars = nvgpu.mbarrier.create -> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10>
+      %mbar_pointer = nvgpu.mbarrier.get %mbars[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>>
+    ```
+  }];
+  let arguments = (ins NVGPU_MBarrierGroup:$barriers, Index:$mbarId);
+  let results = (outs AnyTypeOf<[I32, I64]>:$mbarrierPointer);
+  let assemblyFormat = [{
+    $barriers `[` $mbarId `]` attr-dict `:` type($barriers) `->` type($mbarrierPointer)
+  }];
+}
+
 def NVGPU_MBarrierInitOp : NVGPU_Op<"mbarrier.init", []> {
   let summary = "Initialize the `nvgpu.mbarrier`.";
   let description = [{
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index f53de416f2abd..31c28a6008a22 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -819,6 +819,24 @@ struct MBarrierBasePattern : public ConvertOpToLLVMPattern<SourceOp> {
   }
 };
 
+struct NVGPUMBarrierGetLowering
+    : public MBarrierBasePattern<nvgpu::MBarrierGetOp> {
+  using MBarrierBasePattern<nvgpu::MBarrierGetOp>::MBarrierBasePattern;
+
+  LogicalResult
+  matchAndRewrite(nvgpu::MBarrierGetOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    ImplicitLocOpBuilder b(op->getLoc(), rewriter);
+    nvgpu::MBarrierGroupType mbarrierType = op.getBarriers().getType();
+    rewriter.setInsertionPoint(op);
+    Value barrier = getMbarrierPtr(b, mbarrierType, adaptor.getBarriers(),
+                                   adaptor.getMbarId(), rewriter);
+    Type resType = op.getMbarrierPointer().getType();
+    rewriter.replaceOpWithNewOp<LLVM::PtrToIntOp>(op, resType, barrier);
+    return success();
+  }
+};
+
 /// Lowers `nvgpu.mbarrier.init` to `nvvm.mbarrier.init`
 struct NVGPUMBarrierInitLowering
     : public MBarrierBasePattern<nvgpu::MBarrierInitOp> {
@@ -1706,6 +1724,7 @@ void mlir::populateNVGPUToNVVMConversionPatterns(
   patterns.add<
       NVGPUMBarrierCreateLowering,           // nvgpu.mbarrier.create
       NVGPUMBarrierInitLowering,             // nvgpu.mbarrier.init
+      NVGPUMBarrierGetLowering,              // nvgpu.mbarrier.get
       NVGPUMBarrierArriveLowering,           // nvgpu.mbarrier.arrive
       NVGPUMBarrierArriveNoCompleteLowering, // nvgpu.mbarrier.arrive.no_complete
       NVGPUMBarrierTestWaitLowering,         // nvgpu.mbarrier.test_wait_parity
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 6b59b5e4343b4..cb4dd7d3b2961 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -532,6 +532,28 @@ func.func @mbarrier_nocomplete() {
   func.return
 }
 
+// CHECK-LABEL: func @mbarrier_get
+//  CHECK-SAME:     %[[ARG0:.*]]: !nvgpu.mbarrier.group{{.*}}
+func.func @mbarrier_get(%barriers : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5>) {
+  // CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[ARG0]] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: %[[c2:.+]] = arith.constant 2 : index
+  // CHECK: %[[S1:.+]] = builtin.unrealized_conversion_cast %[[c2]] : index to i64
+  // CHECK: %[[S2:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> 
+  // CHECK: %[[S3:.+]] = llvm.getelementptr %[[S2]][%[[S1]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
+  // CHECK: %[[S4:.+]] = llvm.ptrtoint %[[S3]] : !llvm.ptr<3> to i32
+  %c2 = arith.constant 2 : index
+  nvgpu.mbarrier.get %barriers[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> -> i32
+
+  // CHECK: %[[c4:.+]] = arith.constant 4 : index
+  // CHECK: %[[S5:.+]] = builtin.unrealized_conversion_cast %[[c4]] : index to i64
+  // CHECK: %[[S6:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> 
+  // CHECK: %[[S7:.+]] = llvm.getelementptr %[[S6]][%[[S5]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
+  // CHECK: %[[S8:.+]] = llvm.ptrtoint %[[S7]] : !llvm.ptr<3> to i64
+  %c4 = arith.constant 4 : index
+  nvgpu.mbarrier.get %barriers[%c4] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> -> i64
+  func.return
+}
+
 // CHECK-LABEL: func @mbarrier_wait(
 //  CHECK-SAME:     %[[ARG0:.*]]: !nvgpu.mbarrier.group{{.*}}, %[[ARG1:.*]]: !nvgpu.mbarrier.token)
 func.func @mbarrier_wait(%barriers : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5>, %token : !tokenType) {



More information about the Mlir-commits mailing list