[Mlir-commits] [mlir] [mlir][GPU] Extend gpu.barrier with scope and named-barrier support (PR #195692)
Fabian Mora
llvmlistbot at llvm.org
Wed May 6 05:51:23 PDT 2026
================
@@ -528,71 +574,145 @@ struct GPUBarrierOpLowering final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
matchAndRewrite(gpu::BarrierOp op, gpu::BarrierOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Location loc = op.getLoc();
-
- // Analyze the address_spaces attribute to determine fence behavior.
- bool fenceGlobal = false;
- bool fenceLDS = false;
- std::optional<ArrayAttr> addrSpacesToFence = op.getAddressSpaces();
-
- if (addrSpacesToFence) {
- for (auto spaceAttr :
- addrSpacesToFence->getAsRange<gpu::AddressSpaceAttr>()) {
- switch (spaceAttr.getValue()) {
- case gpu::AddressSpace::Global:
- fenceGlobal = true;
- break;
- case gpu::AddressSpace::Workgroup:
- fenceLDS = true;
- break;
- case gpu::AddressSpace::Private:
- case gpu::AddressSpace::Constant:
- // Private is thread-local, constant is read-only; no fencing needed.
- break;
- }
- }
- } else {
- // Default semantics match __syncthreads() and fence both global and LDS.
- fenceGlobal = true;
- fenceLDS = true;
+ gpu::Scope scope = op.getScope();
+
+ // Subgroup (wave) scope.
+ if (scope == gpu::Scope::Subgroup) {
+ emitFences(op.getAddressSpaces(), rewriter, loc, "wavefront",
+ /*before=*/true);
+ ROCDL::WaveBarrierOp::create(rewriter, loc);
+ emitFences(op.getAddressSpaces(), rewriter, loc, "wavefront",
+ /*before=*/false);
+ rewriter.eraseOp(op);
+ return success();
}
- Attribute mmra;
- if (fenceLDS && !fenceGlobal) {
- mmra =
- rewriter.getAttr<LLVM::MMRATagAttr>("amdgpu-synchronize-as", "local");
- } else if (fenceGlobal && !fenceLDS) {
- mmra = rewriter.getAttr<LLVM::MMRATagAttr>("amdgpu-synchronize-as",
- "global");
+ // Device and CrossDevice scopes are not directly representable.
+ if (scope == gpu::Scope::Device || scope == gpu::Scope::CrossDevice)
+ return op.emitOpError("device/cross_device scope barriers are not "
+ "supported on AMDGPU");
+
+ // Cluster scope: gfx1250+ only, signal/wait with constant -3.
+ if (scope == gpu::Scope::Cluster) {
+ if (chipset < amdgpu::Chipset(12, 5, 0))
+ return op.emitOpError("cluster scope barriers require gfx1250+");
+ emitFences(op.getAddressSpaces(), rewriter, loc, "cluster",
+ /*before=*/true);
+ ROCDL::BarrierSignalOp::create(rewriter, loc, -3);
+ ROCDL::BarrierWaitOp::create(rewriter, loc, static_cast<int16_t>(-3));
+ emitFences(op.getAddressSpaces(), rewriter, loc, "cluster",
+ /*before=*/false);
+ rewriter.eraseOp(op);
+ return success();
}
- constexpr llvm::StringLiteral scope = "workgroup";
-
- bool emitFences = fenceGlobal || fenceLDS;
- // Emit release fence if needed.
- if (emitFences) {
- auto relFence = LLVM::FenceOp::create(
- rewriter, loc, LLVM::AtomicOrdering::release, scope);
- if (mmra)
- relFence->setDiscardableAttr(LLVM::LLVMDialect::getMmraAttrName(),
- mmra);
+ // Workgroup scope (default).
+ assert(scope == gpu::Scope::Workgroup);
+
+ // Named barrier path.
+ if (Value namedBarrier = adaptor.getNamedBarrier()) {
+ if (chipset.majorVersion < 12)
+ return op.emitOpError("named barriers require gfx12+");
+
+ emitFences(op.getAddressSpaces(), rewriter, loc, "workgroup",
+ /*before=*/true);
+ // A wave must join the named barrier before it may signal it.
+ ROCDL::BarrierJoinOp::create(rewriter, loc, namedBarrier);
+ // Signal with memberCnt=0 retains the count from s.barrier.init.
+ ROCDL::BarrierSignalVarOp::create(rewriter, loc, namedBarrier,
+ /*memberCnt=*/0);
+ // id=1 selects the named-barrier wait class; the actual barrier waited
+ // on is the last one this wave joined.
+ ROCDL::BarrierWaitOp::create(rewriter, loc, static_cast<int16_t>(1));
----------------
fabianmcg wrote:
Same here for the magic value 1.
https://github.com/llvm/llvm-project/pull/195692
More information about the Mlir-commits
mailing list