[Mlir-commits] [mlir] [mlir][NVVM] Add support for barrier0-reduction operation (PR #167036)

Valentin Clement バレンタイン クレメン llvmlistbot at llvm.org
Mon Nov 10 10:00:50 PST 2025


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/167036

>From a6a0b48a664ced22e45089dd509e218dbf3e86eb Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 14:43:32 -0800
Subject: [PATCH 1/9] [mlir][NVVM] Add support for barrier0 operation with
 predicate

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td   | 49 +++++++++++++++++++
 .../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp  | 14 ++++++
 mlir/test/Target/LLVMIR/nvvmir.mlir           |  7 ---
 3 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 1cc5b74a3cb67..0921272b538bc 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -977,6 +977,55 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
   }];
 }
 
+// Attrs describing the predicate of barrier0 operation.
+def Barrier0PredPopc : I32EnumAttrCase<"POPC", 0, "popc">;
+def Barrier0PredAnd : I32EnumAttrCase<"AND", 1, "and">;
+def Barrier0PredOr : I32EnumAttrCase<"OR", 2, "or">;
+
+def Barrier0Pred
+    : I32EnumAttr<"Barrier0Pred", "NVVM barrier0 predicate",
+                  [Barrier0PredPopc, Barrier0PredAnd, Barrier0PredOr]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+def Barrier0PredAttr : EnumAttr<NVVM_Dialect, Barrier0Pred, "barrier0_pred"> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
+                          Arguments<(ins Barrier0PredAttr:$pred, I32:$value)>,
+                          Results<(outs I32:$res)> {
+  let summary = "CTA Barrier Synchronization with predicate (Barrier ID 0)";
+  let description = [{
+    The `nvvm.barrier0` operation is a convenience operation that performs
+    barrier synchronization and communication within a CTA
+    (Cooperative Thread Array) using barrier ID 0. It is functionally
+    equivalent to `nvvm.barrier` or `nvvm.barrier id=0`.
+
+    `popc` is identical to `nvvm.barrier0` with the additional feature that it
+    evaluates predicate for all threads of the block and returns the number of
+    threads for which predicate evaluates to non-zero.
+
+    `and` is identical to `nvvm.barrier0` with the additional feature that it
+    evaluates predicate for all threads of the block and returns non-zero if
+    and only if predicate evaluates to non-zero for all of them.
+
+    `or` is identical to `nvvm.barrier0` with the additional feature that it
+    evaluates predicate for all threads of the block and returns non-zero if and
+    only if predicate evaluates to non-zero for any of them.
+
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
+  }];
+
+  let assemblyFormat =
+      " ($value^ `:` type($value))? ($pred^)? attr-dict `->` type($res)";
+  string llvmBuilder = [{
+      createIntrinsicCall(
+          builder, getBarrier0IntrinsicID($pred),
+          {$value ? $value : builder.getInt32(0)});
+  }];
+}
+
 def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
   let summary = "CTA Barrier Synchronization Op";
   let description = [{
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index cecff51e637a5..f23758dbb5439 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,6 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
   llvm_unreachable("Unsupported proxy kinds");
 }
 
+static unsigned getBarrier0IntrinsicID(std::optional<NVVM::Barrier0Pred> pred) {
+  if (!pred)
+    return llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all;
+  switch (*pred) {
+  case NVVM::Barrier0Pred::AND:
+    return llvm::Intrinsic::nvvm_barrier0_and;
+  case NVVM::Barrier0Pred::OR:
+    return llvm::Intrinsic::nvvm_barrier0_or;
+  case NVVM::Barrier0Pred::POPC:
+    return llvm::Intrinsic::nvvm_barrier0_popc;
+  }
+  llvm_unreachable("Unknown predicate for barrier0");
+}
+
 static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
   switch (scope) {
   case NVVM::MemScopeKind::CTA:
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 1ec55408e97a5..9929882a033de 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -166,13 +166,6 @@ llvm.func @nvvm_rcp(%0: f32) -> f32 {
   llvm.return %1 : f32
 }
 
-// CHECK-LABEL: @llvm_nvvm_barrier0
-llvm.func @llvm_nvvm_barrier0() {
-  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
-  nvvm.barrier0
-  llvm.return
-}
-
 // CHECK-LABEL: @llvm_nvvm_barrier(
 // CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
 llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {

>From e133a3f9b0c5e06b0491c5f55da8d4ddddb922c7 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 15:08:14 -0800
Subject: [PATCH 2/9] Fix assembly format

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0921272b538bc..530cc316522fe 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1018,11 +1018,10 @@ def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
   }];
 
   let assemblyFormat =
-      " ($value^ `:` type($value))? ($pred^)? attr-dict `->` type($res)";
+      "$value `:` type($value) $pred attr-dict `->` type($res)";
   string llvmBuilder = [{
       createIntrinsicCall(
-          builder, getBarrier0IntrinsicID($pred),
-          {$value ? $value : builder.getInt32(0)});
+          builder, getBarrier0IntrinsicID($pred), {$value});
   }];
 }
 

>From 6e668ae71ee1e00302dae07c693c79730659e0c7 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 15:08:34 -0800
Subject: [PATCH 3/9] Add test file

---
 mlir/test/Target/LLVMIR/nvvm/barrier0.mlir | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 mlir/test/Target/LLVMIR/nvvm/barrier0.mlir

diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
new file mode 100644
index 0000000000000..1e999c6526b11
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-translate -mlir-to-llvmir %s  -split-input-file --verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: @llvm_nvvm_barrier0(
+// CHECK-SAME: i32 %[[VALUE:.*]])
+llvm.func @llvm_nvvm_barrier0(%c : i32) {
+  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+  nvvm.barrier0 
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[VALUE]])
+  %0 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<and> -> i32
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[VALUE]])
+  %1 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<or> -> i32
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[VALUE]])
+  %2 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<popc> -> i32
+  llvm.return
+}

>From 2a91932675700b8a8c363ecb1d6776c09eb35643 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 15:10:14 -0800
Subject: [PATCH 4/9] Remove optional from getBarrier0IntrinsicID

---
 .../Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp  | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index f23758dbb5439..45b144b333903 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,10 +291,8 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
   llvm_unreachable("Unsupported proxy kinds");
 }
 
-static unsigned getBarrier0IntrinsicID(std::optional<NVVM::Barrier0Pred> pred) {
-  if (!pred)
-    return llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all;
-  switch (*pred) {
+static unsigned getBarrier0IntrinsicID(NVVM::Barrier0Pred pred) {
+  switch (pred) {
   case NVVM::Barrier0Pred::AND:
     return llvm::Intrinsic::nvvm_barrier0_and;
   case NVVM::Barrier0Pred::OR:

>From f7ce92f5650eeb92a2e2b7a7447d04b165accb94 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Sun, 9 Nov 2025 08:39:39 -0800
Subject: [PATCH 5/9] Merge with nvvm.barrier

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td   | 81 +++++++------------
 mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp    |  9 +++
 .../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp  | 22 ++---
 mlir/test/Target/LLVMIR/nvvm/barrier.mlir     | 20 +++++
 mlir/test/Target/LLVMIR/nvvm/barrier0.mlir    | 15 ----
 mlir/test/Target/LLVMIR/nvvmir.mlir           | 12 ---
 6 files changed, 70 insertions(+), 89 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/nvvm/barrier.mlir
 delete mode 100644 mlir/test/Target/LLVMIR/nvvm/barrier0.mlir

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 530cc316522fe..e6a7e7a04ce85 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -977,54 +977,23 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
   }];
 }
 
-// Attrs describing the predicate of barrier0 operation.
-def Barrier0PredPopc : I32EnumAttrCase<"POPC", 0, "popc">;
-def Barrier0PredAnd : I32EnumAttrCase<"AND", 1, "and">;
-def Barrier0PredOr : I32EnumAttrCase<"OR", 2, "or">;
-
-def Barrier0Pred
-    : I32EnumAttr<"Barrier0Pred", "NVVM barrier0 predicate",
-                  [Barrier0PredPopc, Barrier0PredAnd, Barrier0PredOr]> {
+// Attrs describing the reduction operations for the barrier operation.
+def BarrierReductionPopc : I32EnumAttrCase<"POPC", 0, "popc">;
+def BarrierReductionAnd : I32EnumAttrCase<"AND", 1, "and">;
+def BarrierReductionOr : I32EnumAttrCase<"OR", 2, "or">;
+
+def BarrierReduction
+    : I32EnumAttr<"BarrierReduction", "NVVM barrier reduction operation",
+                  [BarrierReductionPopc, BarrierReductionAnd,
+                   BarrierReductionOr]> {
   let genSpecializedAttr = 0;
   let cppNamespace = "::mlir::NVVM";
 }
-def Barrier0PredAttr : EnumAttr<NVVM_Dialect, Barrier0Pred, "barrier0_pred"> {
+def BarrierReductionAttr
+    : EnumAttr<NVVM_Dialect, BarrierReduction, "reduction"> {
   let assemblyFormat = "`<` $value `>`";
 }
 
-def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
-                          Arguments<(ins Barrier0PredAttr:$pred, I32:$value)>,
-                          Results<(outs I32:$res)> {
-  let summary = "CTA Barrier Synchronization with predicate (Barrier ID 0)";
-  let description = [{
-    The `nvvm.barrier0` operation is a convenience operation that performs
-    barrier synchronization and communication within a CTA
-    (Cooperative Thread Array) using barrier ID 0. It is functionally
-    equivalent to `nvvm.barrier` or `nvvm.barrier id=0`.
-
-    `popc` is identical to `nvvm.barrier0` with the additional feature that it
-    evaluates predicate for all threads of the block and returns the number of
-    threads for which predicate evaluates to non-zero.
-
-    `and` is identical to `nvvm.barrier0` with the additional feature that it
-    evaluates predicate for all threads of the block and returns non-zero if
-    and only if predicate evaluates to non-zero for all of them.
-
-    `or` is identical to `nvvm.barrier0` with the additional feature that it
-    evaluates predicate for all threads of the block and returns non-zero if and
-    only if predicate evaluates to non-zero for any of them.
-
-    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
-  }];
-
-  let assemblyFormat =
-      "$value `:` type($value) $pred attr-dict `->` type($res)";
-  string llvmBuilder = [{
-      createIntrinsicCall(
-          builder, getBarrier0IntrinsicID($pred), {$value});
-  }];
-}
-
 def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
   let summary = "CTA Barrier Synchronization Op";
   let description = [{
@@ -1039,6 +1008,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
     - `numberOfThreads`: Specifies the number of threads participating in the barrier. 
       When specified, the value must be a multiple of the warp size. If not specified, 
       all threads in the CTA participate in the barrier.
+    - `reductionOp`
 
     The barrier operation guarantees that when the barrier completes, prior memory 
     accesses requested by participating threads are performed relative to all threads 
@@ -1055,31 +1025,36 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
   }];
 
-  let arguments = (ins     
-    Optional<I32>:$barrierId,
-    Optional<I32>:$numberOfThreads);
+  let arguments = (ins Optional<I32>:$barrierId, Optional<I32>:$numberOfThreads,
+      OptionalAttr<BarrierReductionAttr>:$reductionOp,
+      Optional<I32>:$reductionPredicate);
   string llvmBuilder = [{
     llvm::Value *id = $barrierId ? $barrierId : builder.getInt32(0);
     if ($numberOfThreads)
       createIntrinsicCall(
           builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_count,
           {id, $numberOfThreads});
+    else if ($reductionOp)
+      createIntrinsicCall(
+          builder, getBarrierIntrinsicID($reductionOp), {$reductionPredicate});
     else
       createIntrinsicCall(
           builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all, {id});
   }];
+  let results = (outs Optional<I32>:$res);
+
   let hasVerifier = 1;
 
-  let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+  let assemblyFormat =
+      "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? "
+      "($reductionOp^ $reductionPredicate)? (`->` type($res)^)? attr-dict";
 
-  let builders = [
-    OpBuilder<(ins), [{
-      return build($_builder, $_state, Value{}, Value{});
+  let builders = [OpBuilder<(ins), [{
+      return build($_builder, $_state, TypeRange{}, Value{}, Value{}, {}, Value{});
     }]>,
-    OpBuilder<(ins "Value":$barrierId), [{
-      return build($_builder, $_state, barrierId, Value{});
-    }]>
-  ];
+                  OpBuilder<(ins "Value":$barrierId), [{
+      return build($_builder, $_state, TypeRange{}, barrierId, Value{}, {}, Value{});
+    }]>];
 }
 
 def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive"> 
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index d43f8815be16d..67080948255f4 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1504,6 +1504,15 @@ LogicalResult NVVM::BarrierOp::verify() {
   if (getNumberOfThreads() && !getBarrierId())
     return emitOpError(
         "barrier id is missing, it should be set between 0 to 15");
+
+  if (getBarrierId() && (getReductionOp() || getReductionPredicate()))
+    return emitOpError("reduction are only available for barrier id 0");
+
+  if ((getReductionOp() && !getReductionPredicate()) ||
+      (!getReductionOp() && getReductionPredicate()))
+    return emitOpError("reduction predicate and reduction operation must be "
+                       "specified together");
+
   return success();
 }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 45b144b333903..7a3633a0646e7 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,16 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
   llvm_unreachable("Unsupported proxy kinds");
 }
 
-static unsigned getBarrier0IntrinsicID(NVVM::Barrier0Pred pred) {
-  switch (pred) {
-  case NVVM::Barrier0Pred::AND:
-    return llvm::Intrinsic::nvvm_barrier0_and;
-  case NVVM::Barrier0Pred::OR:
-    return llvm::Intrinsic::nvvm_barrier0_or;
-  case NVVM::Barrier0Pred::POPC:
-    return llvm::Intrinsic::nvvm_barrier0_popc;
+static unsigned
+getBarrierIntrinsicID(std::optional<NVVM::BarrierReduction> reduction) {
+  if (reduction) {
+    switch (*reduction) {
+    case NVVM::BarrierReduction::AND:
+      return llvm::Intrinsic::nvvm_barrier0_and;
+    case NVVM::BarrierReduction::OR:
+      return llvm::Intrinsic::nvvm_barrier0_or;
+    case NVVM::BarrierReduction::POPC:
+      return llvm::Intrinsic::nvvm_barrier0_popc;
+    }
   }
-  llvm_unreachable("Unknown predicate for barrier0");
+
+  llvm_unreachable("Unknown reduction operation for barrier");
 }
 
 static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir
new file mode 100644
index 0000000000000..4d165ac037411
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir
@@ -0,0 +1,20 @@
+// RUN: mlir-translate -mlir-to-llvmir %s  -split-input-file --verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: @llvm_nvvm_barrier(
+// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[predicate:.*]])
+llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32, %predicate : i32) {
+  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+  nvvm.barrier
+  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]])
+  nvvm.barrier id = %barID
+  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]])
+  nvvm.barrier id = %barID number_of_threads = %numberOfThreads
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[predicate]])
+  %0 = nvvm.barrier #nvvm.reduction<and> %predicate -> i32
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[predicate]])
+  %1 = nvvm.barrier #nvvm.reduction<or> %predicate -> i32
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[predicate]])
+  %2 = nvvm.barrier #nvvm.reduction<popc> %predicate -> i32
+
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
deleted file mode 100644
index 1e999c6526b11..0000000000000
--- a/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s  -split-input-file --verify-diagnostics | FileCheck %s
-
-// CHECK-LABEL: @llvm_nvvm_barrier0(
-// CHECK-SAME: i32 %[[VALUE:.*]])
-llvm.func @llvm_nvvm_barrier0(%c : i32) {
-  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
-  nvvm.barrier0 
-  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[VALUE]])
-  %0 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<and> -> i32
-  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[VALUE]])
-  %1 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<or> -> i32
-  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[VALUE]])
-  %2 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<popc> -> i32
-  llvm.return
-}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 9929882a033de..fb8b1d33e9060 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -166,18 +166,6 @@ llvm.func @nvvm_rcp(%0: f32) -> f32 {
   llvm.return %1 : f32
 }
 
-// CHECK-LABEL: @llvm_nvvm_barrier(
-// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
-llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {
-  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
-  nvvm.barrier
-  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]])
-  nvvm.barrier id = %barID
-  // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]])
-  nvvm.barrier id = %barID number_of_threads = %numberOfThreads
-  llvm.return
-}
-
 // CHECK-LABEL: @llvm_nvvm_cluster_arrive
 llvm.func @llvm_nvvm_cluster_arrive() {
   // CHECK: call void @llvm.nvvm.barrier.cluster.arrive()

>From d29a8bdf07bc7b1d09594613aca2309e06817553 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 10 Nov 2025 08:40:49 -0800
Subject: [PATCH 6/9] Add doc

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index e6a7e7a04ce85..cc5c2d7cd9370 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1008,7 +1008,9 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
     - `numberOfThreads`: Specifies the number of threads participating in the barrier. 
       When specified, the value must be a multiple of the warp size. If not specified, 
       all threads in the CTA participate in the barrier.
-    - `reductionOp`
+    - `reductionOp`: specifies the reduction operation (`popc`, `and`, `or`).
+    - `reductionPredicate`: specifies the predicate to be used with the
+      `reductionOp`. 
 
     The barrier operation guarantees that when the barrier completes, prior memory 
     accesses requested by participating threads are performed relative to all threads 

>From e1df091796cc89e61c42fc93534e4253ad5cf30d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Mon, 10 Nov 2025 08:45:49 -0800
Subject: [PATCH 7/9] Apply suggestion

Co-authored-by: Guray Ozen <guray.ozen at gmail.com>
---
 mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 67080948255f4..ff9820af64f59 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1506,7 +1506,7 @@ LogicalResult NVVM::BarrierOp::verify() {
         "barrier id is missing, it should be set between 0 to 15");
 
   if (getBarrierId() && (getReductionOp() || getReductionPredicate()))
-    return emitOpError("reduction are only available for barrier id 0");
+    return emitOpError("reduction are only available when id is 0");
 
   if ((getReductionOp() && !getReductionPredicate()) ||
       (!getReductionOp() && getReductionPredicate()))

>From 79099d081bcaa66b0449bd98874127b731f59e43 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 10 Nov 2025 09:00:56 -0800
Subject: [PATCH 8/9] Rename reductionPredicate to redcutionOperand

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td |  6 +++---
 mlir/test/Target/LLVMIR/nvvm/barrier.mlir   | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index cc5c2d7cd9370..b430b0a277368 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1029,7 +1029,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
 
   let arguments = (ins Optional<I32>:$barrierId, Optional<I32>:$numberOfThreads,
       OptionalAttr<BarrierReductionAttr>:$reductionOp,
-      Optional<I32>:$reductionPredicate);
+      Optional<I32>:$reductionOperand);
   string llvmBuilder = [{
     llvm::Value *id = $barrierId ? $barrierId : builder.getInt32(0);
     if ($numberOfThreads)
@@ -1038,7 +1038,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
           {id, $numberOfThreads});
     else if ($reductionOp)
       createIntrinsicCall(
-          builder, getBarrierIntrinsicID($reductionOp), {$reductionPredicate});
+          builder, getBarrierIntrinsicID($reductionOp), {$reductionOperand});
     else
       createIntrinsicCall(
           builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all, {id});
@@ -1049,7 +1049,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
 
   let assemblyFormat =
       "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? "
-      "($reductionOp^ $reductionPredicate)? (`->` type($res)^)? attr-dict";
+      "($reductionOp^ $reductionOperand)? (`->` type($res)^)? attr-dict";
 
   let builders = [OpBuilder<(ins), [{
       return build($_builder, $_state, TypeRange{}, Value{}, Value{}, {}, Value{});
diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir
index 4d165ac037411..d89f93101c1fc 100644
--- a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir
@@ -1,20 +1,20 @@
 // RUN: mlir-translate -mlir-to-llvmir %s  -split-input-file --verify-diagnostics | FileCheck %s
 
 // CHECK-LABEL: @llvm_nvvm_barrier(
-// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[predicate:.*]])
-llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32, %predicate : i32) {
+// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[redOperand:.*]])
+llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32, %redOperand : i32) {
   // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
   nvvm.barrier
   // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]])
   nvvm.barrier id = %barID
   // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]])
   nvvm.barrier id = %barID number_of_threads = %numberOfThreads
-  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[predicate]])
-  %0 = nvvm.barrier #nvvm.reduction<and> %predicate -> i32
-  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[predicate]])
-  %1 = nvvm.barrier #nvvm.reduction<or> %predicate -> i32
-  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[predicate]])
-  %2 = nvvm.barrier #nvvm.reduction<popc> %predicate -> i32
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[redOperand]])
+  %0 = nvvm.barrier #nvvm.reduction<and> %redOperand -> i32
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[redOperand]])
+  %1 = nvvm.barrier #nvvm.reduction<or> %redOperand -> i32
+  // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[redOperand]])
+  %2 = nvvm.barrier #nvvm.reduction<popc> %redOperand -> i32
 
   llvm.return
 }

>From dd5fe888885c64bff7936ff2d49e4225175dd0c5 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 10 Nov 2025 10:00:08 -0800
Subject: [PATCH 9/9] Add getIntrinsicIDAndArgs function

---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td   | 23 ++++++-----
 mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp    | 41 +++++++++++++++++--
 .../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp  | 16 --------
 3 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index b430b0a277368..57cd1a837f5ac 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1027,21 +1027,22 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
   }];
 
+  let extraClassDeclaration = [{
+    static mlir::NVVM::IDArgPair
+      getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+                            llvm::IRBuilderBase& builder);
+  }];
+
   let arguments = (ins Optional<I32>:$barrierId, Optional<I32>:$numberOfThreads,
       OptionalAttr<BarrierReductionAttr>:$reductionOp,
       Optional<I32>:$reductionOperand);
   string llvmBuilder = [{
-    llvm::Value *id = $barrierId ? $barrierId : builder.getInt32(0);
-    if ($numberOfThreads)
-      createIntrinsicCall(
-          builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_count,
-          {id, $numberOfThreads});
-    else if ($reductionOp)
-      createIntrinsicCall(
-          builder, getBarrierIntrinsicID($reductionOp), {$reductionOperand});
-    else
-      createIntrinsicCall(
-          builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all, {id});
+    auto [id, args] = NVVM::BarrierOp::getIntrinsicIDAndArgs(
+                        *op, moduleTranslation, builder);
+    if ($reductionOp)
+      $res = createIntrinsicCall(builder, id, args);
+    else 
+      createIntrinsicCall(builder, id, args);
   }];
   let results = (outs Optional<I32>:$res);
 
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index ff9820af64f59..3e07c9aaacd53 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1505,11 +1505,11 @@ LogicalResult NVVM::BarrierOp::verify() {
     return emitOpError(
         "barrier id is missing, it should be set between 0 to 15");
 
-  if (getBarrierId() && (getReductionOp() || getReductionPredicate()))
+  if (getBarrierId() && (getReductionOp() || getReductionOperand()))
     return emitOpError("reduction are only available when id is 0");
 
-  if ((getReductionOp() && !getReductionPredicate()) ||
-      (!getReductionOp() && getReductionPredicate()))
+  if ((getReductionOp() && !getReductionOperand()) ||
+      (!getReductionOp() && getReductionOperand()))
     return emitOpError("reduction predicate and reduction operation must be "
                        "specified together");
 
@@ -1770,6 +1770,41 @@ static bool isPtrInSharedCTASpace(mlir::Value ptr) {
   return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared);
 }
 
+mlir::NVVM::IDArgPair NVVM::BarrierOp::getIntrinsicIDAndArgs(
+    Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+  auto thisOp = cast<NVVM::BarrierOp>(op);
+  llvm::Value *barrierId = thisOp.getBarrierId()
+                               ? mt.lookupValue(thisOp.getBarrierId())
+                               : builder.getInt32(0);
+  llvm::Intrinsic::ID id;
+  llvm::SmallVector<llvm::Value *> args;
+  if (thisOp.getNumberOfThreads()) {
+    id = llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_count;
+    args.push_back(barrierId);
+    args.push_back(mt.lookupValue(thisOp.getNumberOfThreads()));
+  } else if (thisOp.getReductionOp()) {
+    switch (*thisOp.getReductionOp()) {
+    case NVVM::BarrierReduction::AND:
+      id = llvm::Intrinsic::nvvm_barrier0_and;
+      break;
+    case NVVM::BarrierReduction::OR:
+      id = llvm::Intrinsic::nvvm_barrier0_or;
+      break;
+    case NVVM::BarrierReduction::POPC:
+      id = llvm::Intrinsic::nvvm_barrier0_popc;
+      break;
+    default:
+      llvm_unreachable("Unknown reduction operation for barrier");
+    }
+    args.push_back(mt.lookupValue(thisOp.getReductionOperand()));
+  } else {
+    id = llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all;
+    args.push_back(barrierId);
+  }
+
+  return {id, std::move(args)};
+}
+
 mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
     Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
   auto thisOp = cast<NVVM::MBarrierInitOp>(op);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 7a3633a0646e7..cecff51e637a5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,22 +291,6 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
   llvm_unreachable("Unsupported proxy kinds");
 }
 
-static unsigned
-getBarrierIntrinsicID(std::optional<NVVM::BarrierReduction> reduction) {
-  if (reduction) {
-    switch (*reduction) {
-    case NVVM::BarrierReduction::AND:
-      return llvm::Intrinsic::nvvm_barrier0_and;
-    case NVVM::BarrierReduction::OR:
-      return llvm::Intrinsic::nvvm_barrier0_or;
-    case NVVM::BarrierReduction::POPC:
-      return llvm::Intrinsic::nvvm_barrier0_popc;
-    }
-  }
-
-  llvm_unreachable("Unknown reduction operation for barrier");
-}
-
 static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
   switch (scope) {
   case NVVM::MemScopeKind::CTA:



More information about the Mlir-commits mailing list