[Mlir-commits] [mlir] [mlir][NVVM] Add support for barrier0 operation with predicate (PR #167036)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Nov 7 14:49:46 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
Add support for `nvvm.barrier0.[popc|and|or]` operation. It is added as a separate operation since `Barrier0Op` has no result.
https://docs.nvidia.com/cuda/nvvm-ir-spec/#barrier-and-memory-fence
This will be used in CUDA Fortran lowering:
https://github.com/llvm/llvm-project/blob/49f55f4991227f3c7a2b8161bbf45c74b7023944/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp#L1081
And could be used later in the CUDA C/C++ with CIR
https://github.com/llvm/llvm-project/blob/49f55f4991227f3c7a2b8161bbf45c74b7023944/clang/lib/Headers/__clang_cuda_device_functions.h#L524
---
Full diff: https://github.com/llvm/llvm-project/pull/167036.diff
3 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+49)
- (modified) mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp (+14)
- (modified) mlir/test/Target/LLVMIR/nvvmir.mlir (-7)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 1cc5b74a3cb67..0921272b538bc 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -977,6 +977,55 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
}];
}
+// Attrs describing the predicate of barrier0 operation.
+def Barrier0PredPopc : I32EnumAttrCase<"POPC", 0, "popc">;
+def Barrier0PredAnd : I32EnumAttrCase<"AND", 1, "and">;
+def Barrier0PredOr : I32EnumAttrCase<"OR", 2, "or">;
+
+def Barrier0Pred
+ : I32EnumAttr<"Barrier0Pred", "NVVM barrier0 predicate",
+ [Barrier0PredPopc, Barrier0PredAnd, Barrier0PredOr]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::NVVM";
+}
+def Barrier0PredAttr : EnumAttr<NVVM_Dialect, Barrier0Pred, "barrier0_pred"> {
+ let assemblyFormat = "`<` $value `>`";
+}
+
+def NVVM_Barrier0PredOp : NVVM_Op<"barrier0.pred">,
+ Arguments<(ins Barrier0PredAttr:$pred, I32:$value)>,
+ Results<(outs I32:$res)> {
+ let summary = "CTA Barrier Synchronization with predicate (Barrier ID 0)";
+ let description = [{
+ The `nvvm.barrier0` operation is a convenience operation that performs
+ barrier synchronization and communication within a CTA
+ (Cooperative Thread Array) using barrier ID 0. It is functionally
+ equivalent to `nvvm.barrier` or `nvvm.barrier id=0`.
+
+ `popc` is identical to `nvvm.barrier0` with the additional feature that it
+ evaluates predicate for all threads of the block and returns the number of
+ threads for which predicate evaluates to non-zero.
+
+ `and` is identical to `nvvm.barrier0` with the additional feature that it
+ evaluates predicate for all threads of the block and returns non-zero if
+ and only if predicate evaluates to non-zero for all of them.
+
+ `or` is identical to `nvvm.barrier0` with the additional feature that it
+ evaluates predicate for all threads of the block and returns non-zero if and
+ only if predicate evaluates to non-zero for any of them.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
+ }];
+
+ let assemblyFormat =
+ " ($value^ `:` type($value))? ($pred^)? attr-dict `->` type($res)";
+ string llvmBuilder = [{
+ createIntrinsicCall(
+ builder, getBarrier0IntrinsicID($pred),
+ {$value ? $value : builder.getInt32(0)});
+ }];
+}
+
def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
let summary = "CTA Barrier Synchronization Op";
let description = [{
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index cecff51e637a5..f23758dbb5439 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,6 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
llvm_unreachable("Unsupported proxy kinds");
}
+static unsigned getBarrier0IntrinsicID(std::optional<NVVM::Barrier0Pred> pred) {
+ if (!pred)
+ return llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all;
+ switch (*pred) {
+ case NVVM::Barrier0Pred::AND:
+ return llvm::Intrinsic::nvvm_barrier0_and;
+ case NVVM::Barrier0Pred::OR:
+ return llvm::Intrinsic::nvvm_barrier0_or;
+ case NVVM::Barrier0Pred::POPC:
+ return llvm::Intrinsic::nvvm_barrier0_popc;
+ }
+ llvm_unreachable("Unknown predicate for barrier0");
+}
+
static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
switch (scope) {
case NVVM::MemScopeKind::CTA:
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 1ec55408e97a5..9929882a033de 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -166,13 +166,6 @@ llvm.func @nvvm_rcp(%0: f32) -> f32 {
llvm.return %1 : f32
}
-// CHECK-LABEL: @llvm_nvvm_barrier0
-llvm.func @llvm_nvvm_barrier0() {
- // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
- nvvm.barrier0
- llvm.return
-}
-
// CHECK-LABEL: @llvm_nvvm_barrier(
// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]])
llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/167036
More information about the Mlir-commits
mailing list