[Mlir-commits] [mlir] [mlir][NVVM] Add support for barrier0-reduction operation (PR #167036)
Valentin Clement バレンタイン クレメン
llvmlistbot at llvm.org
Mon Nov 10 08:46:00 PST 2025
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/167036
>From a6a0b48a664ced22e45089dd509e218dbf3e86eb Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 14:43:32 -0800
Subject: [PATCH 1/7] [mlir][NVVM] Add support for barrier0 operation with
predicate
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 49 +++++++++++++++++++
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 14 ++++++
mlir/test/Target/LLVMIR/nvvmir.mlir | 7 ---
3 files changed, 63 insertions(+), 7 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 1cc5b74a3cb67..0921272b538bc 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -977,6 +977,55 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
}];
}
+// Attrs describing the predicate of barrier0 operation.
+def Barrier0PredPopc : I32EnumAttrCase<"POPC", 0, "popc">;
+def Barrier0PredAnd : I32EnumAttrCase<"AND", 1, "and">;
+def Barrier0PredOr : I32EnumAttrCase<"OR", 2, "or">;
+
+def Barrier0Pred
+ : I32EnumAttr<"Barrier0Pred", "NVVM barrier0 predicate",
+ [Barrier0PredPopc, Barrier0PredAnd, Barrier0PredOr]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::NVVM";
+}
+def Barrier0PredAttr : EnumAttr<NVVM_Dialect, Barrier0Pred, "barrier0_pred"> {
+ let assemblyFormat = "`<` $value `>`";
+}
+
+def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
+ Arguments<(ins Barrier0PredAttr:$pred, I32:$value)>,
+ Results<(outs I32:$res)> {
+ let summary = "CTA Barrier Synchronization with predicate (Barrier ID 0)";
+ let description = [{
+ The `nvvm.barrier0` operation is a convenience operation that performs
+ barrier synchronization and communication within a CTA
+ (Cooperative Thread Array) using barrier ID 0. It is functionally
+ equivalent to `nvvm.barrier` or `nvvm.barrier id=0`.
+
+ `popc` is identical to `nvvm.barrier0` with the additional feature that it
+ evaluates predicate for all threads of the block and returns the number of
+ threads for which predicate evaluates to non-zero.
+
+ `and` is identical to `nvvm.barrier0` with the additional feature that it
+ evaluates predicate for all threads of the block and returns non-zero if
+ and only if predicate evaluates to non-zero for all of them.
+
+ `or` is identical to `nvvm.barrier0` with the additional feature that it
+ evaluates predicate for all threads of the block and returns non-zero if and
+ only if predicate evaluates to non-zero for any of them.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
+ }];
+
+ let assemblyFormat =
+ " ($value^ `:` type($value))? ($pred^)? attr-dict `->` type($res)";
+ string llvmBuilder = [{
+ createIntrinsicCall(
+ builder, getBarrier0IntrinsicID($pred),
+ {$value ? $value : builder.getInt32(0)});
+ }];
+}
+
def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
let summary = "CTA Barrier Synchronization Op";
let description = [{
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index cecff51e637a5..f23758dbb5439 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,6 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
llvm_unreachable("Unsupported proxy kinds");
}
+static unsigned getBarrier0IntrinsicID(std::optional<NVVM::Barrier0Pred> pred) {
+ if (!pred)
+ return llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all;
+ switch (*pred) {
+ case NVVM::Barrier0Pred::AND:
+ return llvm::Intrinsic::nvvm_barrier0_and;
+ case NVVM::Barrier0Pred::OR:
+ return llvm::Intrinsic::nvvm_barrier0_or;
+ case NVVM::Barrier0Pred::POPC:
+ return llvm::Intrinsic::nvvm_barrier0_popc;
+ }
+ llvm_unreachable("Unknown predicate for barrier0");
+}
+
static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
switch (scope) {
case NVVM::MemScopeKind::CTA:
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 1ec55408e97a5..9929882a033de 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -166,13 +166,6 @@ llvm.func @nvvm_rcp(%0: f32) -> f32 {
llvm.return %1 : f32
}
-// CHECK-LABEL: @llvm_nvvm_barrier0
-llvm.func @llvm_nvvm_barrier0() {
- // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
- nvvm.barrier0
- llvm.return
-}
-
// CHECK-LABEL: @llvm_nvvm_barrier(
// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {
>From e133a3f9b0c5e06b0491c5f55da8d4ddddb922c7 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 15:08:14 -0800
Subject: [PATCH 2/7] Fix assembly format
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0921272b538bc..530cc316522fe 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1018,11 +1018,10 @@ def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
}];
let assemblyFormat =
- " ($value^ `:` type($value))? ($pred^)? attr-dict `->` type($res)";
+ "$value `:` type($value) $pred attr-dict `->` type($res)";
string llvmBuilder = [{
createIntrinsicCall(
- builder, getBarrier0IntrinsicID($pred),
- {$value ? $value : builder.getInt32(0)});
+ builder, getBarrier0IntrinsicID($pred), {$value});
}];
}
>From 6e668ae71ee1e00302dae07c693c79730659e0c7 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 15:08:34 -0800
Subject: [PATCH 3/7] Add test file
---
mlir/test/Target/LLVMIR/nvvm/barrier0.mlir | 15 +++++++++++++++
1 file changed, 15 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
new file mode 100644
index 0000000000000..1e999c6526b11
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: @llvm_nvvm_barrier0(
+// CHECK-SAME: i32 %[[VALUE:.*]])
+llvm.func @llvm_nvvm_barrier0(%c : i32) {
+ // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+ nvvm.barrier0
+ // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[VALUE]])
+ %0 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<and> -> i32
+ // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[VALUE]])
+ %1 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<or> -> i32
+ // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[VALUE]])
+ %2 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<popc> -> i32
+ llvm.return
+}
>From 2a91932675700b8a8c363ecb1d6776c09eb35643 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Nov 2025 15:10:14 -0800
Subject: [PATCH 4/7] Remove optional from getBarrier0IntrinsicID
---
.../Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index f23758dbb5439..45b144b333903 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,10 +291,8 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
llvm_unreachable("Unsupported proxy kinds");
}
-static unsigned getBarrier0IntrinsicID(std::optional<NVVM::Barrier0Pred> pred) {
- if (!pred)
- return llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all;
- switch (*pred) {
+static unsigned getBarrier0IntrinsicID(NVVM::Barrier0Pred pred) {
+ switch (pred) {
case NVVM::Barrier0Pred::AND:
return llvm::Intrinsic::nvvm_barrier0_and;
case NVVM::Barrier0Pred::OR:
>From f7ce92f5650eeb92a2e2b7a7447d04b165accb94 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Sun, 9 Nov 2025 08:39:39 -0800
Subject: [PATCH 5/7] Merge with nvvm.barrier
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 81 +++++++------------
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 9 +++
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 22 ++---
mlir/test/Target/LLVMIR/nvvm/barrier.mlir | 20 +++++
mlir/test/Target/LLVMIR/nvvm/barrier0.mlir | 15 ----
mlir/test/Target/LLVMIR/nvvmir.mlir | 12 ---
6 files changed, 70 insertions(+), 89 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/nvvm/barrier.mlir
delete mode 100644 mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 530cc316522fe..e6a7e7a04ce85 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -977,54 +977,23 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
}];
}
-// Attrs describing the predicate of barrier0 operation.
-def Barrier0PredPopc : I32EnumAttrCase<"POPC", 0, "popc">;
-def Barrier0PredAnd : I32EnumAttrCase<"AND", 1, "and">;
-def Barrier0PredOr : I32EnumAttrCase<"OR", 2, "or">;
-
-def Barrier0Pred
- : I32EnumAttr<"Barrier0Pred", "NVVM barrier0 predicate",
- [Barrier0PredPopc, Barrier0PredAnd, Barrier0PredOr]> {
+// Attrs describing the reduction operations for the barrier operation.
+def BarrierReductionPopc : I32EnumAttrCase<"POPC", 0, "popc">;
+def BarrierReductionAnd : I32EnumAttrCase<"AND", 1, "and">;
+def BarrierReductionOr : I32EnumAttrCase<"OR", 2, "or">;
+
+def BarrierReduction
+ : I32EnumAttr<"BarrierReduction", "NVVM barrier reduction operation",
+ [BarrierReductionPopc, BarrierReductionAnd,
+ BarrierReductionOr]> {
let genSpecializedAttr = 0;
let cppNamespace = "::mlir::NVVM";
}
-def Barrier0PredAttr : EnumAttr<NVVM_Dialect, Barrier0Pred, "barrier0_pred"> {
+def BarrierReductionAttr
+ : EnumAttr<NVVM_Dialect, BarrierReduction, "reduction"> {
let assemblyFormat = "`<` $value `>`";
}
-def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
- Arguments<(ins Barrier0PredAttr:$pred, I32:$value)>,
- Results<(outs I32:$res)> {
- let summary = "CTA Barrier Synchronization with predicate (Barrier ID 0)";
- let description = [{
- The `nvvm.barrier0` operation is a convenience operation that performs
- barrier synchronization and communication within a CTA
- (Cooperative Thread Array) using barrier ID 0. It is functionally
- equivalent to `nvvm.barrier` or `nvvm.barrier id=0`.
-
- `popc` is identical to `nvvm.barrier0` with the additional feature that it
- evaluates predicate for all threads of the block and returns the number of
- threads for which predicate evaluates to non-zero.
-
- `and` is identical to `nvvm.barrier0` with the additional feature that it
- evaluates predicate for all threads of the block and returns non-zero if
- and only if predicate evaluates to non-zero for all of them.
-
- `or` is identical to `nvvm.barrier0` with the additional feature that it
- evaluates predicate for all threads of the block and returns non-zero if and
- only if predicate evaluates to non-zero for any of them.
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
- }];
-
- let assemblyFormat =
- "$value `:` type($value) $pred attr-dict `->` type($res)";
- string llvmBuilder = [{
- createIntrinsicCall(
- builder, getBarrier0IntrinsicID($pred), {$value});
- }];
-}
-
def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
let summary = "CTA Barrier Synchronization Op";
let description = [{
@@ -1039,6 +1008,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
- `numberOfThreads`: Specifies the number of threads participating in the barrier.
When specified, the value must be a multiple of the warp size. If not specified,
all threads in the CTA participate in the barrier.
+ - `reductionOp`
The barrier operation guarantees that when the barrier completes, prior memory
accesses requested by participating threads are performed relative to all threads
@@ -1055,31 +1025,36 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
}];
- let arguments = (ins
- Optional<I32>:$barrierId,
- Optional<I32>:$numberOfThreads);
+ let arguments = (ins Optional<I32>:$barrierId, Optional<I32>:$numberOfThreads,
+ OptionalAttr<BarrierReductionAttr>:$reductionOp,
+ Optional<I32>:$reductionPredicate);
string llvmBuilder = [{
llvm::Value *id = $barrierId ? $barrierId : builder.getInt32(0);
if ($numberOfThreads)
createIntrinsicCall(
builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_count,
{id, $numberOfThreads});
+ else if ($reductionOp)
+ createIntrinsicCall(
+ builder, getBarrierIntrinsicID($reductionOp), {$reductionPredicate});
else
createIntrinsicCall(
builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all, {id});
}];
+ let results = (outs Optional<I32>:$res);
+
let hasVerifier = 1;
- let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict";
+ let assemblyFormat =
+ "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? "
+ "($reductionOp^ $reductionPredicate)? (`->` type($res)^)? attr-dict";
- let builders = [
- OpBuilder<(ins), [{
- return build($_builder, $_state, Value{}, Value{});
+ let builders = [OpBuilder<(ins), [{
+ return build($_builder, $_state, TypeRange{}, Value{}, Value{}, {}, Value{});
}]>,
- OpBuilder<(ins "Value":$barrierId), [{
- return build($_builder, $_state, barrierId, Value{});
- }]>
- ];
+ OpBuilder<(ins "Value":$barrierId), [{
+ return build($_builder, $_state, TypeRange{}, barrierId, Value{}, {}, Value{});
+ }]>];
}
def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive">
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index d43f8815be16d..67080948255f4 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1504,6 +1504,15 @@ LogicalResult NVVM::BarrierOp::verify() {
if (getNumberOfThreads() && !getBarrierId())
return emitOpError(
"barrier id is missing, it should be set between 0 to 15");
+
+ if (getBarrierId() && (getReductionOp() || getReductionPredicate()))
+ return emitOpError("reduction are only available for barrier id 0");
+
+ if ((getReductionOp() && !getReductionPredicate()) ||
+ (!getReductionOp() && getReductionPredicate()))
+ return emitOpError("reduction predicate and reduction operation must be "
+ "specified together");
+
return success();
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 45b144b333903..7a3633a0646e7 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,16 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
llvm_unreachable("Unsupported proxy kinds");
}
-static unsigned getBarrier0IntrinsicID(NVVM::Barrier0Pred pred) {
- switch (pred) {
- case NVVM::Barrier0Pred::AND:
- return llvm::Intrinsic::nvvm_barrier0_and;
- case NVVM::Barrier0Pred::OR:
- return llvm::Intrinsic::nvvm_barrier0_or;
- case NVVM::Barrier0Pred::POPC:
- return llvm::Intrinsic::nvvm_barrier0_popc;
+static unsigned
+getBarrierIntrinsicID(std::optional<NVVM::BarrierReduction> reduction) {
+ if (reduction) {
+ switch (*reduction) {
+ case NVVM::BarrierReduction::AND:
+ return llvm::Intrinsic::nvvm_barrier0_and;
+ case NVVM::BarrierReduction::OR:
+ return llvm::Intrinsic::nvvm_barrier0_or;
+ case NVVM::BarrierReduction::POPC:
+ return llvm::Intrinsic::nvvm_barrier0_popc;
+ }
}
- llvm_unreachable("Unknown predicate for barrier0");
+
+ llvm_unreachable("Unknown reduction operation for barrier");
}
static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir
new file mode 100644
index 0000000000000..4d165ac037411
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir
@@ -0,0 +1,20 @@
+// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: @llvm_nvvm_barrier(
+// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[predicate:.*]])
+llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32, %predicate : i32) {
+ // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+ nvvm.barrier
+ // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]])
+ nvvm.barrier id = %barID
+ // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]])
+ nvvm.barrier id = %barID number_of_threads = %numberOfThreads
+ // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[predicate]])
+ %0 = nvvm.barrier #nvvm.reduction<and> %predicate -> i32
+ // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[predicate]])
+ %1 = nvvm.barrier #nvvm.reduction<or> %predicate -> i32
+ // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[predicate]])
+ %2 = nvvm.barrier #nvvm.reduction<popc> %predicate -> i32
+
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
deleted file mode 100644
index 1e999c6526b11..0000000000000
--- a/mlir/test/Target/LLVMIR/nvvm/barrier0.mlir
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
-
-// CHECK-LABEL: @llvm_nvvm_barrier0(
-// CHECK-SAME: i32 %[[VALUE:.*]])
-llvm.func @llvm_nvvm_barrier0(%c : i32) {
- // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
- nvvm.barrier0
- // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[VALUE]])
- %0 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<and> -> i32
- // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[VALUE]])
- %1 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<or> -> i32
- // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[VALUE]])
- %2 = nvvm.barrier0.pred %c : i32 #nvvm.barrier0_pred<popc> -> i32
- llvm.return
-}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 9929882a033de..fb8b1d33e9060 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -166,18 +166,6 @@ llvm.func @nvvm_rcp(%0: f32) -> f32 {
llvm.return %1 : f32
}
-// CHECK-LABEL: @llvm_nvvm_barrier(
-// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
-llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {
- // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
- nvvm.barrier
- // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]])
- nvvm.barrier id = %barID
- // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]])
- nvvm.barrier id = %barID number_of_threads = %numberOfThreads
- llvm.return
-}
-
// CHECK-LABEL: @llvm_nvvm_cluster_arrive
llvm.func @llvm_nvvm_cluster_arrive() {
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
>From d29a8bdf07bc7b1d09594613aca2309e06817553 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 10 Nov 2025 08:40:49 -0800
Subject: [PATCH 6/7] Add doc
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index e6a7e7a04ce85..cc5c2d7cd9370 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1008,7 +1008,9 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
- `numberOfThreads`: Specifies the number of threads participating in the barrier.
When specified, the value must be a multiple of the warp size. If not specified,
all threads in the CTA participate in the barrier.
- - `reductionOp`
+ - `reductionOp`: specifies the reduction operation (`popc`, `and`, `or`).
+ - `reductionPredicate`: specifies the predicate to be used with the
+ `reductionOp`.
The barrier operation guarantees that when the barrier completes, prior memory
accesses requested by participating threads are performed relative to all threads
>From e1df091796cc89e61c42fc93534e4253ad5cf30d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
=?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
=?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Mon, 10 Nov 2025 08:45:49 -0800
Subject: [PATCH 7/7] Apply suggestion
Co-authored-by: Guray Ozen <guray.ozen at gmail.com>
---
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 67080948255f4..ff9820af64f59 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1506,7 +1506,7 @@ LogicalResult NVVM::BarrierOp::verify() {
"barrier id is missing, it should be set between 0 to 15");
if (getBarrierId() && (getReductionOp() || getReductionPredicate()))
- return emitOpError("reduction are only available for barrier id 0");
+ return emitOpError("reduction are only available when id is 0");
if ((getReductionOp() && !getReductionPredicate()) ||
(!getReductionOp() && getReductionPredicate()))
More information about the Mlir-commits
mailing list