[Mlir-commits] [mlir] [mlir] [amdgpu] Remove s_wait_loadcnt from amdgpu.lds_barrier on gfx12 (PR #152778)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Aug 8 12:06:22 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Paul Trojahn (ptrojahn)

<details>
<summary>Changes</summary>

Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here: https://github.com/llvm/llvm-project/blob/bd9117c569678e7af042074cbcaba860ab6eefb3/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp#L2017

---
Full diff: https://github.com/llvm/llvm-project/pull/152778.diff


2 Files Affected:

- (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+11-9) 
- (modified) mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir (+2-3) 


``````````diff
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 64720bfe6cf50..767221177c816 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -535,18 +535,22 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
   LogicalResult
   matchAndRewrite(LDSBarrierOp op, LDSBarrierOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion == 11;
+    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion >= 11;
 
     if (requiresInlineAsm) {
       auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(),
                                                       LLVM::AsmDialect::AD_ATT);
-      const char *asmStr =
+      const char *asmStrPreGfx12 =
           ";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier";
+      const char *asmStr =
+          ";;;WARNING: BREAKS DEBUG WATCHES\n"
+          "s_wait_dscnt 0x0\ns_barrier_signal -1\ns_barrier_wait -1";
       const char *constraints = "";
       rewriter.replaceOpWithNewOp<LLVM::InlineAsmOp>(
           op,
           /*resultTypes=*/TypeRange(), /*operands=*/ValueRange(),
-          /*asm_string=*/asmStr, constraints, /*has_side_effects=*/true,
+          /*asm_string=*/chipset.majorVersion >= 12 ? asmStr : asmStrPreGfx12,
+          constraints, /*has_side_effects=*/true,
           /*is_align_stack=*/false, LLVM::TailCallKind::None,
           /*asm_dialect=*/asmDialectAttr,
           /*operand_attrs=*/ArrayAttr());
@@ -574,14 +578,12 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
       Location loc = op->getLoc();
       ROCDL::SWaitcntOp::create(rewriter, loc, ldsOnlyBits);
       rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
+      return success();
     } else {
-      Location loc = op->getLoc();
-      ROCDL::WaitDscntOp::create(rewriter, loc, 0);
-      ROCDL::BarrierSignalOp::create(rewriter, loc, -1);
-      rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
+      return op.emitOpError(
+                 "don't know how to lower this for chipset major version")
+             << chipset.majorVersion;
     }
-
-    return success();
   }
 };
 
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index cc1162d8b0de8..d59f7fe3ba4c2 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -424,9 +424,8 @@ func.func @lds_barrier() {
   // GFX10-NEXT: rocdl.s.barrier
   // GFX11:  llvm.inline_asm has_side_effects asm_dialect = att
   // GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
-  // GFX12:  rocdl.s.wait.dscnt 0
-  // GFX12-NEXT: rocdl.s.barrier.signal -1
-  // GFX12-NEXT: rocdl.s.barrier.wait -1
+  // GFX12:  llvm.inline_asm has_side_effects asm_dialect = att
+  // GFX12-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_wait_dscnt 0x0\0As_barrier_signal -1\0As_barrier_wait -1"
   amdgpu.lds_barrier
   func.return
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/152778


More information about the Mlir-commits mailing list