[Mlir-commits] [mlir] GPU known subgroup size (PR #112732)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Oct 17 08:37:24 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Finlay (FMarno)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/112732.diff
4 Files Affected:
- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUBase.td (+2-1)
- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+8-1)
- (modified) mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp (+39-8)
- (modified) mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir (+4-6)
``````````diff
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 860f8933672038..fb9df5067a31b0 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -64,7 +64,8 @@ def GPU_Dialect : Dialect {
let discardableAttrs = (ins
"::mlir::DenseI32ArrayAttr":$known_block_size,
- "::mlir::DenseI32ArrayAttr":$known_grid_size
+ "::mlir::DenseI32ArrayAttr":$known_grid_size,
+ "::mlir::IntegerAttr" : $known_subgroup_size
);
let dependentDialects = ["arith::ArithDialect"];
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6098eb34d04d52..d4779d1b47a42d 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -388,6 +388,12 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
by using `gpu.known_block_size` or `gpu.known_grid_size`, but this carries
the risk that they will de discarded.
+ A function may optionally be annotated with the subgroup size that will be
+ used when it is launched using the `known_subgroup_size` attribute. If set,
+ this attribute is a single positive integer (i.e. > 0). Launching a function
+ with this annotation, using a subgroup size other than specified is
+ undefined behaviour.
+
Syntax:
```
@@ -431,7 +437,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
GPU_OptionalDimSizeHintAttr:$known_block_size,
- GPU_OptionalDimSizeHintAttr:$known_grid_size);
+ GPU_OptionalDimSizeHintAttr:$known_grid_size,
+ OptionalAttr<I32Attr>:$known_subgroup_size);
let regions = (region AnyRegion:$body);
let skipDefaultBuilders = 1;
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 544f1f4a4f6a79..2a5e5de2357641 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -271,24 +271,55 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
typeMangling.value());
}
+ static std::optional<uint32_t>
+ getIntelReqdSubGroupSize(FunctionOpInterface func) {
+ constexpr llvm::StringLiteral discardableIntelReqdSubgroupSize =
+ "llvm.intel_reqd_sub_group_size";
+ IntegerAttr reqdSubgroupSizeAttr = llvm::cast_if_present<IntegerAttr>(
+ func->getAttr(discardableIntelReqdSubgroupSize));
+ if (!reqdSubgroupSizeAttr)
+ return {};
+
+ return reqdSubgroupSizeAttr.getInt();
+ }
+
/// Get the subgroup size from the target or return a default.
- static int getSubgroupSize(Operation *op) {
- return spirv::lookupTargetEnvOrDefault(op)
- .getResourceLimits()
- .getSubgroupSize();
+ static std::optional<uint32_t>
+ getKnownSubgroupSize(FunctionOpInterface func) {
+ IntegerAttr knownSubgroupSizeAttr =
+ mlir::gpu::GPUDialect::KnownSubgroupSizeAttrHelper(func->getContext())
+ .getAttr(func);
+ if (!knownSubgroupSizeAttr)
+ return {};
+
+ return knownSubgroupSizeAttr.getInt();
}
- static bool hasValidWidth(gpu::ShuffleOp op) {
+ static std::optional<uint32_t> getSubgroupSize(Operation *op) {
+ FunctionOpInterface func = op->getParentOfType<FunctionOpInterface>();
+ if (!func)
+ return {};
+ auto knownSubgroupSize = getKnownSubgroupSize(func);
+ if (knownSubgroupSize)
+ return knownSubgroupSize;
+ return getIntelReqdSubGroupSize(func);
+ }
+
+ static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
llvm::APInt val;
Value width = op.getWidth();
- return matchPattern(width, m_ConstantInt(&val)) &&
- val == getSubgroupSize(op);
+ return matchPattern(width, m_ConstantInt(&val)) && val == subgroupSize;
}
LogicalResult
matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
- if (!hasValidWidth(op))
+ auto maybeSubgroupSize = getSubgroupSize(op);
+ if (!maybeSubgroupSize)
+ return rewriter.notifyMatchFailure(
+ op, "subgroup size not specified. Should be specified with "
+ "known_subgroup_size.");
+ if (!hasValidWidth(op, maybeSubgroupSize.value()))
return rewriter.notifyMatchFailure(
op, "shuffle width and subgroup size mismatch");
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 910105ddf69586..467d15e5c2ef2b 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -260,7 +260,7 @@ gpu.module @shuffles {
func.func @gpu_shuffles(%val0: i32, %id: i32,
%val1: i64, %mask: i32,
%val2: f32, %delta_up: i32,
- %val3: f64, %delta_down: i32) {
+ %val3: f64, %delta_down: i32) attributes { llvm.intel_reqd_sub_group_size = 32 : i32 } {
%width = arith.constant 32 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
// CHECK-SAME-DAG: no_unwind
@@ -302,9 +302,7 @@ gpu.module @shuffles {
// Check `gpu.shuffle` conversion with explicit subgroup size.
-gpu.module @shuffles attributes {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
-} {
+gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
@@ -352,7 +350,7 @@ gpu.module @shuffles attributes {
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
- // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
+ // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
func.func @gpu_shuffles(%i8_val: i8,
%i16_val: i16,
%i32_val: i32,
@@ -360,7 +358,7 @@ gpu.module @shuffles attributes {
%f16_val: f16,
%f32_val: f32,
%f64_val: f64,
- %offset: i32) {
+ %offset: i32) attributes {gpu.known_subgroup_size = 16 : i32} {
%width = arith.constant 16 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
``````````
</details>
https://github.com/llvm/llvm-project/pull/112732
More information about the Mlir-commits
mailing list