[Mlir-commits] [mlir] GPU known subgroup size (PR #112732)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Oct 17 08:36:45 PDT 2024
https://github.com/FMarno created https://github.com/llvm/llvm-project/pull/112732
None
>From 4c61ea3ccfeebc2673335370ae4a3c4b74207f0e Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 1 Oct 2024 17:55:05 +0100
Subject: [PATCH 1/4] WIP add known subgroup size
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 9 ++++++++-
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 17 ++++++++++-------
.../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 2 +-
3 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6098eb34d04d52..d4779d1b47a42d 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -388,6 +388,12 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
by using `gpu.known_block_size` or `gpu.known_grid_size`, but this carries
the risk that they will de discarded.
+ A function may optionally be annotated with the subgroup size that will be
+ used when it is launched using the `known_subgroup_size` attribute. If set,
+ this attribute is a single positive integer (i.e. > 0). Launching a function
+ with this annotation, using a subgroup size other than specified is
+ undefined behaviour.
+
Syntax:
```
@@ -431,7 +437,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
GPU_OptionalDimSizeHintAttr:$known_block_size,
- GPU_OptionalDimSizeHintAttr:$known_grid_size);
+ GPU_OptionalDimSizeHintAttr:$known_grid_size,
+ OptionalAttr<I32Attr>:$known_subgroup_size);
let regions = (region AnyRegion:$body);
let skipDefaultBuilders = 1;
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 544f1f4a4f6a79..43b72023a2815f 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -272,23 +272,26 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
}
/// Get the subgroup size from the target or return a default.
- static int getSubgroupSize(Operation *op) {
- return spirv::lookupTargetEnvOrDefault(op)
- .getResourceLimits()
- .getSubgroupSize();
+ static std::optional<uint32_t> getSubgroupSize(Operation *op) {
+ // TODO check for intel_reqd_sub_group_size
+ return op->getParentOfType<gpu::GPUFuncOp>().getKnownSubgroupSize();
}
- static bool hasValidWidth(gpu::ShuffleOp op) {
+ static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
llvm::APInt val;
Value width = op.getWidth();
return matchPattern(width, m_ConstantInt(&val)) &&
- val == getSubgroupSize(op);
+ val == subgroupSize;
}
LogicalResult
matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
- if (!hasValidWidth(op))
+ auto maybeSubgroupSize = getSubgroupSize(op);
+ if (!maybeSubgroupSize)
+ return rewriter.notifyMatchFailure(
+ op, "subgroup size not specified. Should be specified with known_subgroup_size.");
+ if (!hasValidWidth(op, maybeSubgroupSize.value()))
return rewriter.notifyMatchFailure(
op, "shuffle width and subgroup size mismatch");
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 910105ddf69586..372ff3d51ca64c 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -303,7 +303,7 @@ gpu.module @shuffles {
// Check `gpu.shuffle` conversion with explicit subgroup size.
gpu.module @shuffles attributes {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
+ gpu.known_subgroup_size = 16 : i32
} {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
// CHECK-SAME-DAG: no_unwind
>From 469376a77fbd8b8cd088e7da1cd2eb396af85d90 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Mon, 14 Oct 2024 15:31:24 +0100
Subject: [PATCH 2/4] allow gpu.known_subgroup_size on any op
---
.../lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 16 ++++++++++++----
.../Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 10 ++++------
2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 43b72023a2815f..8f5751ed0c6981 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -274,14 +274,21 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
/// Get the subgroup size from the target or return a default.
static std::optional<uint32_t> getSubgroupSize(Operation *op) {
// TODO check for intel_reqd_sub_group_size
- return op->getParentOfType<gpu::GPUFuncOp>().getKnownSubgroupSize();
+
+ FunctionOpInterface func = op->getParentOfType<FunctionOpInterface>();
+ if (!func)
+ return {};
+
+ Attribute knownSubgroupSizeAttr = func->getAttr("gpu.known_subgroup_size");
+ if (!knownSubgroupSizeAttr)
+ return {};
+ return cast<IntegerAttr>(knownSubgroupSizeAttr).getInt();
}
static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
llvm::APInt val;
Value width = op.getWidth();
- return matchPattern(width, m_ConstantInt(&val)) &&
- val == subgroupSize;
+ return matchPattern(width, m_ConstantInt(&val)) && val == subgroupSize;
}
LogicalResult
@@ -290,7 +297,8 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
auto maybeSubgroupSize = getSubgroupSize(op);
if (!maybeSubgroupSize)
return rewriter.notifyMatchFailure(
- op, "subgroup size not specified. Should be specified with known_subgroup_size.");
+ op, "subgroup size not specified. Should be specified with "
+ "known_subgroup_size.");
if (!hasValidWidth(op, maybeSubgroupSize.value()))
return rewriter.notifyMatchFailure(
op, "shuffle width and subgroup size mismatch");
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 372ff3d51ca64c..31a871e52023a8 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -260,7 +260,7 @@ gpu.module @shuffles {
func.func @gpu_shuffles(%val0: i32, %id: i32,
%val1: i64, %mask: i32,
%val2: f32, %delta_up: i32,
- %val3: f64, %delta_down: i32) {
+ %val3: f64, %delta_down: i32) attributes {gpu.known_subgroup_size = 32 : i32} {
%width = arith.constant 32 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
// CHECK-SAME-DAG: no_unwind
@@ -302,9 +302,7 @@ gpu.module @shuffles {
// Check `gpu.shuffle` conversion with explicit subgroup size.
-gpu.module @shuffles attributes {
- gpu.known_subgroup_size = 16 : i32
-} {
+gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
@@ -352,7 +350,7 @@ gpu.module @shuffles attributes {
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
- // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
+ // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
func.func @gpu_shuffles(%i8_val: i8,
%i16_val: i16,
%i32_val: i32,
@@ -360,7 +358,7 @@ gpu.module @shuffles attributes {
%f16_val: f16,
%f32_val: f32,
%f64_val: f64,
- %offset: i32) {
+ %offset: i32) attributes {gpu.known_subgroup_size = 16 : i32} {
%width = arith.constant 16 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
>From 6bd9b88c8d03c0eb224ed821ad3426cacb26ce6d Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 15 Oct 2024 16:50:39 +0100
Subject: [PATCH 3/4] make known subgroup size a inherent and discardable
attribute
---
mlir/include/mlir/Dialect/GPU/IR/GPUBase.td | 3 ++-
mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 6 ++++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 860f8933672038..fb9df5067a31b0 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -64,7 +64,8 @@ def GPU_Dialect : Dialect {
let discardableAttrs = (ins
"::mlir::DenseI32ArrayAttr":$known_block_size,
- "::mlir::DenseI32ArrayAttr":$known_grid_size
+ "::mlir::DenseI32ArrayAttr":$known_grid_size,
+ "::mlir::IntegerAttr" : $known_subgroup_size
);
let dependentDialects = ["arith::ArithDialect"];
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 8f5751ed0c6981..3eccb6777ea4f4 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -279,10 +279,12 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
if (!func)
return {};
- Attribute knownSubgroupSizeAttr = func->getAttr("gpu.known_subgroup_size");
+ IntegerAttr knownSubgroupSizeAttr =
+ mlir::gpu::GPUDialect::KnownSubgroupSizeAttrHelper(op->getContext())
+ .getAttr(func);
if (!knownSubgroupSizeAttr)
return {};
- return cast<IntegerAttr>(knownSubgroupSizeAttr).getInt();
+ return knownSubgroupSizeAttr.getInt();
}
static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
>From 39938127e57d64f1caf0b75285fcda8f8d821d1e Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Thu, 17 Oct 2024 11:34:16 +0100
Subject: [PATCH 4/4] hacky support for intel_reqd_sub_group_size
---
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 32 +++++++++++++++----
.../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 2 +-
2 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 3eccb6777ea4f4..2a5e5de2357641 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -271,22 +271,40 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
typeMangling.value());
}
- /// Get the subgroup size from the target or return a default.
- static std::optional<uint32_t> getSubgroupSize(Operation *op) {
- // TODO check for intel_reqd_sub_group_size
-
- FunctionOpInterface func = op->getParentOfType<FunctionOpInterface>();
- if (!func)
+ static std::optional<uint32_t>
+ getIntelReqdSubGroupSize(FunctionOpInterface func) {
+ constexpr llvm::StringLiteral discardableIntelReqdSubgroupSize =
+ "llvm.intel_reqd_sub_group_size";
+ IntegerAttr reqdSubgroupSizeAttr = llvm::cast_if_present<IntegerAttr>(
+ func->getAttr(discardableIntelReqdSubgroupSize));
+ if (!reqdSubgroupSizeAttr)
return {};
+ return reqdSubgroupSizeAttr.getInt();
+ }
+
+ /// Get the subgroup size from the target or return a default.
+ static std::optional<uint32_t>
+ getKnownSubgroupSize(FunctionOpInterface func) {
IntegerAttr knownSubgroupSizeAttr =
- mlir::gpu::GPUDialect::KnownSubgroupSizeAttrHelper(op->getContext())
+ mlir::gpu::GPUDialect::KnownSubgroupSizeAttrHelper(func->getContext())
.getAttr(func);
if (!knownSubgroupSizeAttr)
return {};
+
return knownSubgroupSizeAttr.getInt();
}
+ static std::optional<uint32_t> getSubgroupSize(Operation *op) {
+ FunctionOpInterface func = op->getParentOfType<FunctionOpInterface>();
+ if (!func)
+ return {};
+ auto knownSubgroupSize = getKnownSubgroupSize(func);
+ if (knownSubgroupSize)
+ return knownSubgroupSize;
+ return getIntelReqdSubGroupSize(func);
+ }
+
static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
llvm::APInt val;
Value width = op.getWidth();
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 31a871e52023a8..467d15e5c2ef2b 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -260,7 +260,7 @@ gpu.module @shuffles {
func.func @gpu_shuffles(%val0: i32, %id: i32,
%val1: i64, %mask: i32,
%val2: f32, %delta_up: i32,
- %val3: f64, %delta_down: i32) attributes {gpu.known_subgroup_size = 32 : i32} {
+ %val3: f64, %delta_down: i32) attributes { llvm.intel_reqd_sub_group_size = 32 : i32 } {
%width = arith.constant 32 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
// CHECK-SAME-DAG: no_unwind
More information about the Mlir-commits
mailing list