[Mlir-commits] [mlir] [mlir][GPU] Extend gpu.barrier with scope and named-barrier support (PR #195692)

Fabian Mora llvmlistbot at llvm.org
Wed May 6 05:51:23 PDT 2026


================
@@ -528,71 +574,145 @@ struct GPUBarrierOpLowering final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
   matchAndRewrite(gpu::BarrierOp op, gpu::BarrierOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
-
-    // Analyze the address_spaces attribute to determine fence behavior.
-    bool fenceGlobal = false;
-    bool fenceLDS = false;
-    std::optional<ArrayAttr> addrSpacesToFence = op.getAddressSpaces();
-
-    if (addrSpacesToFence) {
-      for (auto spaceAttr :
-           addrSpacesToFence->getAsRange<gpu::AddressSpaceAttr>()) {
-        switch (spaceAttr.getValue()) {
-        case gpu::AddressSpace::Global:
-          fenceGlobal = true;
-          break;
-        case gpu::AddressSpace::Workgroup:
-          fenceLDS = true;
-          break;
-        case gpu::AddressSpace::Private:
-        case gpu::AddressSpace::Constant:
-          // Private is thread-local, constant is read-only; no fencing needed.
-          break;
-        }
-      }
-    } else {
-      // Default semantics match __syncthreads() and fence both global and LDS.
-      fenceGlobal = true;
-      fenceLDS = true;
+    gpu::Scope scope = op.getScope();
+
+    // Subgroup (wave) scope.
+    if (scope == gpu::Scope::Subgroup) {
+      emitFences(op.getAddressSpaces(), rewriter, loc, "wavefront",
+                 /*before=*/true);
+      ROCDL::WaveBarrierOp::create(rewriter, loc);
+      emitFences(op.getAddressSpaces(), rewriter, loc, "wavefront",
+                 /*before=*/false);
+      rewriter.eraseOp(op);
+      return success();
     }
 
-    Attribute mmra;
-    if (fenceLDS && !fenceGlobal) {
-      mmra =
-          rewriter.getAttr<LLVM::MMRATagAttr>("amdgpu-synchronize-as", "local");
-    } else if (fenceGlobal && !fenceLDS) {
-      mmra = rewriter.getAttr<LLVM::MMRATagAttr>("amdgpu-synchronize-as",
-                                                 "global");
+    // Device and CrossDevice scopes are not directly representable.
+    if (scope == gpu::Scope::Device || scope == gpu::Scope::CrossDevice)
+      return op.emitOpError("device/cross_device scope barriers are not "
+                            "supported on AMDGPU");
+
+    // Cluster scope: gfx1250+ only, signal/wait with constant -3.
+    if (scope == gpu::Scope::Cluster) {
+      if (chipset < amdgpu::Chipset(12, 5, 0))
+        return op.emitOpError("cluster scope barriers require gfx1250+");
+      emitFences(op.getAddressSpaces(), rewriter, loc, "cluster",
+                 /*before=*/true);
+      ROCDL::BarrierSignalOp::create(rewriter, loc, -3);
+      ROCDL::BarrierWaitOp::create(rewriter, loc, static_cast<int16_t>(-3));
+      emitFences(op.getAddressSpaces(), rewriter, loc, "cluster",
+                 /*before=*/false);
+      rewriter.eraseOp(op);
+      return success();
     }
 
-    constexpr llvm::StringLiteral scope = "workgroup";
-
-    bool emitFences = fenceGlobal || fenceLDS;
-    // Emit release fence if needed.
-    if (emitFences) {
-      auto relFence = LLVM::FenceOp::create(
-          rewriter, loc, LLVM::AtomicOrdering::release, scope);
-      if (mmra)
-        relFence->setDiscardableAttr(LLVM::LLVMDialect::getMmraAttrName(),
-                                     mmra);
+    // Workgroup scope (default).
+    assert(scope == gpu::Scope::Workgroup);
+
+    // Named barrier path.
+    if (Value namedBarrier = adaptor.getNamedBarrier()) {
+      if (chipset.majorVersion < 12)
+        return op.emitOpError("named barriers require gfx12+");
+
+      emitFences(op.getAddressSpaces(), rewriter, loc, "workgroup",
+                 /*before=*/true);
+      // A wave must join the named barrier before it may signal it.
+      ROCDL::BarrierJoinOp::create(rewriter, loc, namedBarrier);
+      // Signal with memberCnt=0 retains the count from s.barrier.init.
+      ROCDL::BarrierSignalVarOp::create(rewriter, loc, namedBarrier,
+                                        /*memberCnt=*/0);
+      // id=1 selects the named-barrier wait class; the actual barrier waited
+      // on is the last one this wave joined.
+      ROCDL::BarrierWaitOp::create(rewriter, loc, static_cast<int16_t>(1));
----------------
fabianmcg wrote:

Same here for the magic value 1.

https://github.com/llvm/llvm-project/pull/195692


More information about the Mlir-commits mailing list