[Mlir-commits] [mlir] New gfx12 barrier instructions and update lowering LDSBarrierOp (PR #109273)
Daniel Hernandez-Juarez
llvmlistbot at llvm.org
Thu Sep 19 04:52:16 PDT 2024
https://github.com/dhernandez0 created https://github.com/llvm/llvm-project/pull/109273
New gfx12 barrier instructions: s.barrier.signal, s.barrier.wait and s.wait.dscnt. And update lowering LDSBarrierOp accordingly.
>From a011b7d54d68796d5d74581071dea370f8b8d4bd Mon Sep 17 00:00:00 2001
From: Daniel Hernandez-Juarez <dhernandez0 at gmail.com>
Date: Thu, 19 Sep 2024 11:49:38 +0000
Subject: [PATCH] New gfx12 barrier instructions: s.barrier.signal,
s.barrier.wait and s.wait.dscnt. And update lowering LDSBarrierOp
accordingly.
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 25 +++++++++-
.../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 48 +++++++++++--------
.../AMDGPUToROCDL/amdgpu-to-rocdl.mlir | 4 ++
mlir/test/Dialect/LLVMIR/rocdl.mlir | 22 +++++++++
mlir/test/Target/LLVMIR/rocdl.mlir | 21 ++++++++
5 files changed, 98 insertions(+), 22 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index de232462556502..203e2897a4fac5 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -88,11 +88,12 @@ class ROCDL_IntrPure1Op<string mnemonic> :
class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
list<int> overloadedOperands, list<Trait> traits, int numResults,
- int requiresAccessGroup = 0, int requiresAliasAnalysis = 0> :
+ int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
+ list<string> immArgAttrNames = []> :
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
overloadedOperands, traits, numResults, requiresAccessGroup,
- requiresAliasAnalysis>;
+ requiresAliasAnalysis, 0, immArgPositions, immArgAttrNames>;
//===----------------------------------------------------------------------===//
// ROCDL special register op definitions
@@ -255,6 +256,26 @@ def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
let assemblyFormat = "attr-dict";
}
+def ROCDL_BarrierSignalOp : ROCDL_IntrOp<"s.barrier.signal", [], [], [], 0, 0, 0, [0], ["id"]>,
+ Arguments<(ins I32Attr:$id)> {
+ let results = (outs);
+ let assemblyFormat = "$id attr-dict";
+}
+
+def ROCDL_BarrierWaitOp : ROCDL_IntrOp<"s.barrier.wait", [], [], [], 0>,
+ Arguments<(ins I16Attr:$id)> {
+ let results = (outs);
+ let assemblyFormat = "$id attr-dict";
+ string llvmBuilder =
+ "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier_wait,builder.getInt16(op.getId()));";
+}
+
+def ROCDL_WaitDscntOp: ROCDL_IntrOp<"s.wait.dscnt", [], [], [], 0, 0, 0, [0], ["id"]>,
+ Arguments<(ins I16Attr:$id)> {
+ let results = (outs);
+ let assemblyFormat = "$id attr-dict";
+}
+
def ROCDL_SetPrioOp : ROCDL_IntrOp<"s.setprio", [], [], [], 0>,
Arguments<(ins I16Attr:$priority)> {
let results = (outs);
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index f80d2793eaef59..7112d1607dfdca 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -301,27 +301,35 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
/*operand_attrs=*/ArrayAttr());
return success();
}
- constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8);
- constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8);
- // Left in place in case someone disables the inline ASM path or future
- // chipsets use the same bit pattern.
- constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4);
-
- int32_t ldsOnlyBits;
- if (chipset.majorVersion == 11)
- ldsOnlyBits = ldsOnlyBitsGfx11;
- else if (chipset.majorVersion == 10)
- ldsOnlyBits = ldsOnlyBitsGfx10;
- else if (chipset.majorVersion <= 9)
- ldsOnlyBits = ldsOnlyBitsGfx6789;
- else
- return op.emitOpError(
- "don't know how to lower this for chipset major version")
- << chipset.majorVersion;
+ if (chipset.majorVersion < 12) {
+ constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8);
+ constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8);
+ // Left in place in case someone disables the inline ASM path or future
+ // chipsets use the same bit pattern.
+ constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4);
+
+ int32_t ldsOnlyBits;
+ if (chipset.majorVersion == 11)
+ ldsOnlyBits = ldsOnlyBitsGfx11;
+ else if (chipset.majorVersion == 10)
+ ldsOnlyBits = ldsOnlyBitsGfx10;
+ else if (chipset.majorVersion <= 9)
+ ldsOnlyBits = ldsOnlyBitsGfx6789;
+ else
+ return op.emitOpError(
+ "don't know how to lower this for chipset major version")
+ << chipset.majorVersion;
+
+ Location loc = op->getLoc();
+ rewriter.create<ROCDL::WaitcntOp>(loc, ldsOnlyBits);
+ rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
+ } else {
+ Location loc = op->getLoc();
+ rewriter.create<ROCDL::WaitDscntOp>(loc, 0);
+ rewriter.create<ROCDL::BarrierSignalOp>(loc, -1);
+ rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
+ }
- Location loc = op->getLoc();
- rewriter.create<ROCDL::WaitcntOp>(loc, ldsOnlyBits);
- rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
return success();
}
};
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index 9f4db151043455..7fd5610a88913e 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -2,6 +2,7 @@
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx90a | FileCheck %s --check-prefixes=CHECK,GFX9,GFX90A
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10,RDNA
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11,RDNA
+// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12,RDNA
// CHECK-LABEL: func @gpu_gcn_raw_buffer_load_scalar_i32
func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref<i32>) -> i32 {
@@ -246,6 +247,9 @@ func.func @lds_barrier() {
// GFX10-NEXT: rocdl.s.barrier
// GFX11: llvm.inline_asm has_side_effects asm_dialect = att
// GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
+ // GFX12: rocdl.s.wait.dscnt 0
+ // GFX12-NEXT: rocdl.s.barrier.signal -1
+ // GFX12-NEXT: rocdl.s.barrier.wait -1
amdgpu.lds_barrier
func.return
}
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index f5dd5721c45e6f..397d66d92bc5d5 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -352,6 +352,28 @@ llvm.func @rocdl.s.barrier() {
rocdl.s.barrier
llvm.return
}
+
+llvm.func @rocdl.s.barrier.signal() {
+ // CHECK-LABEL: rocdl.s.barrier.signal
+ // CHECK: rocdl.s.barrier.signal -1
+ rocdl.s.barrier.signal -1
+ llvm.return
+}
+
+llvm.func @rocdl.s.barrier.wait() {
+ // CHECK-LABEL: rocdl.s.barrier.wait
+ // CHECK: rocdl.s.barrier.wait -1
+ rocdl.s.barrier.wait -1
+ llvm.return
+}
+
+llvm.func @rocdl.s.wait.dscnt() {
+ // CHECK-LABEL: rocdl.s.wait.dscnt
+ // CHECK: rocdl.s.wait.dscnt 0
+ rocdl.s.wait.dscnt 0
+ llvm.return
+}
+
// -----
// expected-error at below {{attribute attached to unexpected op}}
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 0f0c2412e5ec2a..08c2d4e6477970 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -142,6 +142,27 @@ llvm.func @rocdl.barrier() {
llvm.return
}
+llvm.func @rocdl.s.barrier.signal() {
+ // CHECK-LABEL: rocdl.s.barrier.signal
+ // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
+ rocdl.s.barrier.signal -1
+ llvm.return
+}
+
+llvm.func @rocdl.s.barrier.wait() {
+ // CHECK-LABEL: rocdl.s.barrier.wait
+ // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
+ rocdl.s.barrier.wait -1
+ llvm.return
+}
+
+llvm.func @rocdl.s.wait.dscnt() {
+ // CHECK-LABEL: rocdl.s.wait.dscnt
+ // CHECK-NEXT: call void @llvm.amdgcn.s.wait.dscnt(i16 0)
+ rocdl.s.wait.dscnt 0
+ llvm.return
+}
+
llvm.func @rocdl.setprio() {
// CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
rocdl.s.setprio 0
More information about the Mlir-commits
mailing list