[Mlir-commits] [mlir] [mlir][AMDGPU] Add a scheduling barrier guard around inlineAsm lds.barrier (PR #109678)
Daniel Hernandez-Juarez
llvmlistbot at llvm.org
Mon Sep 23 08:48:04 PDT 2024
https://github.com/dhernandez0 updated https://github.com/llvm/llvm-project/pull/109678
>From 77c1f5016305c3548a2e9dd8778eac07edd782df Mon Sep 17 00:00:00 2001
From: Daniel Hernandez-Juarez <dhernandez0 at gmail.com>
Date: Mon, 23 Sep 2024 15:36:38 +0000
Subject: [PATCH] Add a scheduling barrier guard around inlineAsm lds.barrier
This commit adds a scheduling regions around the inlineAsm
to guard against possible complications arising from them
interfering with the backend scheduler / register allocation.
---
mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp | 12 ++++++++++--
.../Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir | 4 ++++
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 7112d1607dfdca..8daf0faebe1aa6 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -290,15 +290,23 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
if (requiresInlineAsm) {
auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(),
LLVM::AsmDialect::AD_ATT);
+ Location loc = op->getLoc();
+ // Ensure the inlineAsm is guarded with a scheduling region
+ // So it will not interfere with backend compilation more than
+ // it needs.
+ rewriter.create<amdgpu::SchedBarrierOp>(
+ loc, amdgpu::sched_barrier_opt_enum::none);
const char *asmStr =
";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier";
const char *constraints = "";
- rewriter.replaceOpWithNewOp<LLVM::InlineAsmOp>(
- op,
+ rewriter.create<LLVM::InlineAsmOp>(
+ loc,
/*resultTypes=*/TypeRange(), /*operands=*/ValueRange(),
/*asm_string=*/asmStr, constraints, /*has_side_effects=*/true,
/*is_align_stack=*/false, /*asm_dialect=*/asmDialectAttr,
/*operand_attrs=*/ArrayAttr());
+ rewriter.replaceOpWithNewOp<amdgpu::SchedBarrierOp>(
+ op, amdgpu::sched_barrier_opt_enum::none);
return success();
}
if (chipset.majorVersion < 12) {
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index 7fd5610a88913e..9a17cc6a929c02 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -239,14 +239,18 @@ func.func @amdgpu_raw_buffer_atomic_cmpswap_v2f16(%src : vector<2xf16>, %cmp : v
// CHECK-LABEL: func @lds_barrier
func.func @lds_barrier() {
+ // GFX908: rocdl.sched.barrier 0
// GFX908: llvm.inline_asm has_side_effects asm_dialect = att
// GFX908-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
+ // GFX908: rocdl.sched.barrier 0
// GFX90A: rocdl.waitcnt -7937
// GFX90A-NEXT: rocdl.s.barrier
// GFX10: rocdl.waitcnt -16129
// GFX10-NEXT: rocdl.s.barrier
+ // GFX11: rocdl.sched.barrier 0
// GFX11: llvm.inline_asm has_side_effects asm_dialect = att
// GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
+ // GFX11: rocdl.sched.barrier 0
// GFX12: rocdl.s.wait.dscnt 0
// GFX12-NEXT: rocdl.s.barrier.signal -1
// GFX12-NEXT: rocdl.s.barrier.wait -1
More information about the Mlir-commits
mailing list