[Mlir-commits] [flang] [mlir] [MLIR][NVVM] Remove the `nvvm.barrier0` op (PR #195608)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon May 4 01:06:41 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-gpu
@llvm/pr-subscribers-flang-fir-hlfir
Author: Guray Ozen (grypp)
<details>
<summary>Changes</summary>
This PR removes `nvvm.barrier0` because it is a duplicate op. `nvvm.barrier` can cover its functionality and more.
`nvvm.barrier0` was added historically. It is confusing to maintain it right now.
---
Full diff: https://github.com/llvm/llvm-project/pull/195608.diff
10 Files Affected:
- (modified) flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp (+1-1)
- (modified) flang/test/Lower/CUDA/cuda-device-proc.cuf (+2-2)
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (-18)
- (modified) mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt (-5)
- (removed) mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td (-22)
- (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+11-5)
- (modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+3-3)
- (modified) mlir/test/Dialect/LLVMIR/nvvm.mlir (-7)
- (modified) mlir/test/Target/LLVMIR/nvvmir-invalid.mlir (+4-4)
- (modified) mlir/test/python/dialects/nvvm.py (+2-2)
``````````diff
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 6d8c2fcf42bdd..b53294b68ac92 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -1328,7 +1328,7 @@ CUDAIntrinsicLibrary::genMatchAnySync(mlir::Type resultType,
// SYNCTHREADS
void CUDAIntrinsicLibrary::genSyncThreads(
llvm::ArrayRef<fir::ExtendedValue> args) {
- mlir::NVVM::Barrier0Op::create(builder, loc);
+ mlir::NVVM::BarrierOp::create(builder, loc);
}
// SYNCTHREADS_AND
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 61662e4822177..a4498b27c7646 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -101,7 +101,7 @@ attributes(global) subroutine devsub()
end
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
-! CHECK: nvvm.barrier0
+! CHECK: nvvm.barrier
! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32
! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<and> %c1{{.*}} -> i32
! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
@@ -212,7 +212,7 @@ end
! CHECK-LABEL: func.func @_QPhost1()
! CHECK: cuf.kernel
-! CHECK: nvvm.barrier0
+! CHECK: nvvm.barrier
! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32
! CHECK: nvvm.barrier #nvvm.reduction<and> %c1{{.*}} -> i32
! CHECK: nvvm.barrier #nvvm.reduction<popc> %c1{{.*}} -> i32
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 73afdb29b6149..c93f3437544a2 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1166,24 +1166,6 @@ def NVVM_MBarrierTryWaitOp : NVVM_SingleResultIntrinsicOp<"mbarrier.try_wait"> {
// NVVM synchronization op definitions
//===----------------------------------------------------------------------===//
-def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
- let summary = "CTA Barrier Synchronization Op (Barrier ID 0)";
- let description = [{
- The `nvvm.barrier0` operation is a convenience operation that performs barrier
- synchronization and communication within a CTA (Cooperative Thread Array) using
- barrier ID 0. It is functionally equivalent to `nvvm.barrier` or `nvvm.barrier id=0`.
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
- }];
-
- let assemblyFormat = "attr-dict";
- string llvmBuilder = [{
- createIntrinsicCall(
- builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all,
- {builder.getInt32(0)});
- }];
-}
-
// Attrs describing the reduction operations for the barrier operation.
def BarrierReductionPopc : I32EnumAttrCase<"POPC", 0, "popc">;
def BarrierReductionAnd : I32EnumAttrCase<"AND", 1, "and">;
diff --git a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
index 681d788aa54dd..06137bdb41d71 100644
--- a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
@@ -1,14 +1,9 @@
-set(LLVM_TARGET_DEFINITIONS GPUToNVVM.td)
-mlir_tablegen(GPUToNVVM.cpp.inc -gen-rewriters)
-add_public_tablegen_target(MLIRGPUToNVVMIncGen)
-
add_mlir_conversion_library(MLIRGPUToNVVMTransforms
LowerGpuOpsToNVVMOps.cpp
WmmaOpsToNvvm.cpp
DEPENDS
MLIRConversionPassIncGen
- MLIRGPUToNVVMIncGen
LINK_LIBS PUBLIC
MLIRArithToLLVM
diff --git a/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td b/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
deleted file mode 100644
index 0fcda38631a9b..0000000000000
--- a/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
+++ /dev/null
@@ -1,22 +0,0 @@
-//==-- GPUToNVVM.td - GPU Ops to NVVM Patterns ---------------*- tablegen -*==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Defines Patterns to lower GPU ops to NVVM.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_GPUTONVVM_TD
-#define MLIR_CONVERSION_GPUTONVVM_TD
-
-include "mlir/IR/PatternBase.td"
-include "mlir/Dialect/GPU/IR/GPUOps.td"
-include "mlir/Dialect/LLVMIR/NVVMOps.td"
-
-def : Pat<(GPU_BarrierOp : $op $memory_fence), (NVVM_Barrier0Op)>;
-
-#endif // MLIR_CONVERSION_GPUTONVVM_TD
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 44a4cafcc224b..7a10f7f79d596 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -367,8 +367,16 @@ struct AssertOpToAssertfailLowering
}
};
-/// Import the GPU Ops to NVVM Patterns.
-#include "GPUToNVVM.cpp.inc"
+/// Lowering of gpu.barrier to nvvm.barrier (defaults to barrier id 0).
+struct GPUBarrierToNVVMLowering : public OpRewritePattern<gpu::BarrierOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(gpu::BarrierOp op,
+ PatternRewriter &rewriter) const override {
+ rewriter.replaceOpWithNewOp<NVVM::BarrierOp>(op);
+ return success();
+ }
+};
/// A pass that replaces all occurrences of GPU device operations with their
/// corresponding NVVM equivalent.
@@ -503,9 +511,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
using gpu::index_lowering::IndexKind;
using gpu::index_lowering::IntrType;
- // TODO: Pass benefit to generated patterns.
- populateWithGenerated(patterns);
-
+ patterns.add<GPUBarrierToNVVMLowering>(patterns.getContext(), benefit);
patterns.add<GPUPrintfOpToVPrintfLowering, AssertOpToAssertfailLowering>(
converter, benefit);
patterns.add<
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index e717c515bd1d6..d5aad8321cb9f 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -85,7 +85,7 @@ gpu.module @test_module_2 {
%arg0 = arith.constant 1.0 : f32
// TODO: Check full IR expansion once lowering has settled.
// CHECK: nvvm.shfl.sync bfly {{.*}}
- // CHECK: nvvm.barrier0
+ // CHECK: nvvm.barrier
// CHECK: llvm.fadd
%result = gpu.all_reduce add %arg0 uniform {} : (f32) -> (f32)
@@ -101,7 +101,7 @@ gpu.module @test_module_3 {
%arg0 = arith.constant 1 : i32
// TODO: Check full IR expansion once lowering has settled.
// CHECK: nvvm.shfl.sync bfly {{.*}}
- // CHECK: nvvm.barrier0
+ // CHECK: nvvm.barrier
%result = gpu.all_reduce %arg0 uniform {
^bb(%lhs : i32, %rhs : i32):
%xor = arith.xori %lhs, %rhs : i32
@@ -185,7 +185,7 @@ gpu.module @test_module_4 {
gpu.module @test_module_5 {
// CHECK-LABEL: func @gpu_sync()
func.func @gpu_sync() {
- // CHECK: nvvm.barrier0
+ // CHECK: nvvm.barrier
gpu.barrier
func.return
}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index bbb3cd4b38a41..e3a98cc9cfc34 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -36,13 +36,6 @@ func.func @nvvm_rcp(%arg0: f32) -> f32 {
llvm.return %0 : f32
}
-// CHECK-LABEL: @llvm_nvvm_barrier0
-func.func @llvm_nvvm_barrier0() {
- // CHECK: nvvm.barrier0
- nvvm.barrier0
- llvm.return
-}
-
// CHECK-LABEL: @llvm_nvvm_barrier
// CHECK-SAME: (%[[barId:.*]]: i32, %[[numberOfThreads:.*]]: i32)
llvm.func @llvm_nvvm_barrier(%barId : i32, %numberOfThreads : i32) {
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index a36984590b89b..6e96e918d5f0d 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -618,9 +618,9 @@ llvm.func @nvvm_wmma_load_a_f64(%arg0: !llvm.ptr, %arg1 : i32) {
// -----
-// Test that nvvm.barrier0 at module scope (outside any function) produces a
+// Test that nvvm.barrier at module scope (outside any function) produces a
// proper error instead of crashing with a null dereference in
// createIntrinsicCall. See https://github.com/llvm/llvm-project/issues/186642
-// expected-error @+2 {{'nvvm.barrier0' op cannot be translated to LLVM IR without an active insertion point}}
-// expected-error @+1 {{LLVM Translation failed for operation: nvvm.barrier0}}
-nvvm.barrier0
+// expected-error @+2 {{'nvvm.barrier' op cannot be translated to LLVM IR without an active insertion point}}
+// expected-error @+1 {{LLVM Translation failed for operation: nvvm.barrier}}
+nvvm.barrier
diff --git a/mlir/test/python/dialects/nvvm.py b/mlir/test/python/dialects/nvvm.py
index f5e057812642e..d727a39e956e0 100644
--- a/mlir/test/python/dialects/nvvm.py
+++ b/mlir/test/python/dialects/nvvm.py
@@ -148,7 +148,7 @@ def barriers(mask, vi32, vf32):
reduction_predicate=pred,
)
- nvvm.barrier0()
+ nvvm.barrier()
nvvm.bar_warp_sync(mask)
nvvm.cluster_arrive()
nvvm.cluster_arrive(aligned=True)
@@ -170,7 +170,7 @@ def barriers(mask, vi32, vf32):
# CHECK: %[[BARRIER_1:.*]] = nvvm.barrier #nvvm.reduction<and> %[[PRED]] -> i32
# CHECK: %[[BARRIER_2:.*]] = nvvm.barrier #nvvm.reduction<or> %[[BARRIER_1]] -> i32
# CHECK: %[[BARRIER_3:.*]] = nvvm.barrier #nvvm.reduction<popc> %[[BARRIER_2]] -> i32
-# CHECK: nvvm.barrier0
+# CHECK: nvvm.barrier
# CHECK: nvvm.bar.warp.sync %[[ARG0]] : i32
# CHECK: nvvm.cluster.arrive
# CHECK: nvvm.cluster.arrive {aligned}
``````````
</details>
https://github.com/llvm/llvm-project/pull/195608
More information about the Mlir-commits
mailing list