[Mlir-commits] [mlir] [mlir] [amdgpu] Remove s_wait_loadcnt from amdgpu.lds_barrier on gfx12 (PR #152778)

Paul Trojahn llvmlistbot at llvm.org
Fri Aug 8 12:05:50 PDT 2025


https://github.com/ptrojahn created https://github.com/llvm/llvm-project/pull/152778

Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here: https://github.com/llvm/llvm-project/blob/bd9117c569678e7af042074cbcaba860ab6eefb3/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp#L2017

>From 2dbfd049696c8b204d8f09fc628be0a1121d5dd7 Mon Sep 17 00:00:00 2001
From: Paul Trojahn <paul.trojahn at amd.com>
Date: Fri, 8 Aug 2025 18:56:41 +0200
Subject: [PATCH] Remove s_wait_loadcnt from amdgpu.lds_barrier on gfx12

Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we
need to use inline assembly to get rid of the wait introduced here: https://github.com/llvm/llvm-project/blob/bd9117c569678e7af042074cbcaba860ab6eefb3/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp#L2017
---
 .../AMDGPUToROCDL/AMDGPUToROCDL.cpp           | 20 ++++++++++---------
 .../AMDGPUToROCDL/amdgpu-to-rocdl.mlir        |  5 ++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 64720bfe6cf50..767221177c816 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -535,18 +535,22 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
   LogicalResult
   matchAndRewrite(LDSBarrierOp op, LDSBarrierOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion == 11;
+    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion >= 11;
 
     if (requiresInlineAsm) {
       auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(),
                                                       LLVM::AsmDialect::AD_ATT);
-      const char *asmStr =
+      const char *asmStrPreGfx12 =
           ";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier";
+      const char *asmStr =
+          ";;;WARNING: BREAKS DEBUG WATCHES\n"
+          "s_wait_dscnt 0x0\ns_barrier_signal -1\ns_barrier_wait -1";
       const char *constraints = "";
       rewriter.replaceOpWithNewOp<LLVM::InlineAsmOp>(
           op,
           /*resultTypes=*/TypeRange(), /*operands=*/ValueRange(),
-          /*asm_string=*/asmStr, constraints, /*has_side_effects=*/true,
+          /*asm_string=*/chipset.majorVersion >= 12 ? asmStr : asmStrPreGfx12,
+          constraints, /*has_side_effects=*/true,
           /*is_align_stack=*/false, LLVM::TailCallKind::None,
           /*asm_dialect=*/asmDialectAttr,
           /*operand_attrs=*/ArrayAttr());
@@ -574,14 +578,12 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
       Location loc = op->getLoc();
       ROCDL::SWaitcntOp::create(rewriter, loc, ldsOnlyBits);
       rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
+      return success();
     } else {
-      Location loc = op->getLoc();
-      ROCDL::WaitDscntOp::create(rewriter, loc, 0);
-      ROCDL::BarrierSignalOp::create(rewriter, loc, -1);
-      rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
+      return op.emitOpError(
+                 "don't know how to lower this for chipset major version")
+             << chipset.majorVersion;
     }
-
-    return success();
   }
 };
 
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index cc1162d8b0de8..d59f7fe3ba4c2 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -424,9 +424,8 @@ func.func @lds_barrier() {
   // GFX10-NEXT: rocdl.s.barrier
   // GFX11:  llvm.inline_asm has_side_effects asm_dialect = att
   // GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
-  // GFX12:  rocdl.s.wait.dscnt 0
-  // GFX12-NEXT: rocdl.s.barrier.signal -1
-  // GFX12-NEXT: rocdl.s.barrier.wait -1
+  // GFX12:  llvm.inline_asm has_side_effects asm_dialect = att
+  // GFX12-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_wait_dscnt 0x0\0As_barrier_signal -1\0As_barrier_wait -1"
   amdgpu.lds_barrier
   func.return
 }



More information about the Mlir-commits mailing list