[Mlir-commits] [mlir] [mlir][gpu][spirv] Add patterns for gpu.shuffle up/down (PR #139105)
Hsiangkai Wang
llvmlistbot at llvm.org
Wed May 21 06:08:14 PDT 2025
https://github.com/Hsiangkai updated https://github.com/llvm/llvm-project/pull/139105
>From 632c8978a02abd93687bb95090503bdea6ebcaf1 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <hsiangkai.wang at arm.com>
Date: Fri, 9 May 2025 09:33:33 +0100
Subject: [PATCH 1/3] [mlir][gpu][spirv] Add patterns for gpu.shuffle up/down
Convert
gpu.shuffle down %val, %offset, %width
to
spirv.GroupNonUniformRotateKHR <Subgroup> %val, %offset, cluster_size(%width)
Convert
gpu.shuffle up %val, %offset, %width
to
%down_offset = arith.subi %width, %offset
spirv.GroupNonUniformRotateKHR <Subgroup> %val, %down_offset, cluster_size(%width)
---
mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 15 ++++-
mlir/test/Conversion/GPUToSPIRV/shuffle.mlir | 57 +++++++++++++++++++
2 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index 3cc64b82950b5..3d53c17eb6c07 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -450,8 +450,19 @@ LogicalResult GPUShuffleConversion::matchAndRewrite(
result = rewriter.create<spirv::GroupNonUniformShuffleOp>(
loc, scope, adaptor.getValue(), adaptor.getOffset());
break;
- default:
- return rewriter.notifyMatchFailure(shuffleOp, "unimplemented shuffle mode");
+ case gpu::ShuffleMode::DOWN:
+ result = rewriter.create<spirv::GroupNonUniformRotateKHROp>(
+ loc, scope, adaptor.getValue(), adaptor.getOffset(),
+ shuffleOp.getWidth());
+ break;
+ case gpu::ShuffleMode::UP: {
+ Value offsetForShuffleDown = rewriter.create<arith::SubIOp>(
+ loc, shuffleOp.getWidth(), adaptor.getOffset());
+ result = rewriter.create<spirv::GroupNonUniformRotateKHROp>(
+ loc, scope, adaptor.getValue(), offsetForShuffleDown,
+ shuffleOp.getWidth());
+ break;
+ }
}
rewriter.replaceOp(shuffleOp, {result, trueVal});
diff --git a/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir b/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir
index d3d8ec0dab40f..5d7d3c81577e3 100644
--- a/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir
@@ -72,3 +72,60 @@ gpu.module @kernels {
}
}
+
+// -----
+
+module attributes {
+ gpu.container_module,
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle, GroupNonUniformRotateKHR], []>,
+ #spirv.resource_limits<subgroup_size = 16>>
+} {
+
+gpu.module @kernels {
+ // CHECK-LABEL: spirv.func @shuffle_down()
+ gpu.func @shuffle_down() kernel
+ attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
+ %offset = arith.constant 4 : i32
+ %width = arith.constant 16 : i32
+ %val = arith.constant 42.0 : f32
+
+ // CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
+ // CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
+ // CHECK: %{{.+}} = spirv.Constant true
+ // CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
+ %result, %valid = gpu.shuffle down %val, %offset, %width : f32
+ gpu.return
+ }
+}
+
+}
+
+// -----
+
+module attributes {
+ gpu.container_module,
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle, GroupNonUniformRotateKHR], []>,
+ #spirv.resource_limits<subgroup_size = 16>>
+} {
+
+gpu.module @kernels {
+ // CHECK-LABEL: spirv.func @shuffle_up()
+ gpu.func @shuffle_up() kernel
+ attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
+ %offset = arith.constant 4 : i32
+ %width = arith.constant 16 : i32
+ %val = arith.constant 42.0 : f32
+
+ // CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
+ // CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
+ // CHECK: %{{.+}} = spirv.Constant true
+ // CHECK: %[[DOWN_OFFSET:.+]] = spirv.Constant 12 : i32
+ // CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[DOWN_OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
+ %result, %valid = gpu.shuffle up %val, %offset, %width : f32
+ gpu.return
+ }
+}
+
+}
>From 86c60375c30d5cba20a248615dbafa865dcd4489 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <hsiangkai.wang at arm.com>
Date: Wed, 21 May 2025 13:54:43 +0100
Subject: [PATCH 2/3] The width argument cannot exceed the subgroup limit.
---
mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index 3d53c17eb6c07..2e45d782ce0c7 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -430,10 +430,12 @@ LogicalResult GPUShuffleConversion::matchAndRewrite(
unsigned subgroupSize =
targetEnv.getAttr().getResourceLimits().getSubgroupSize();
IntegerAttr widthAttr;
+ // The width argument specifies the number of lanes that participate in the
+ // shuffle. The width value should not exceed the subgroup limit.
if (!matchPattern(shuffleOp.getWidth(), m_Constant(&widthAttr)) ||
- widthAttr.getValue().getZExtValue() != subgroupSize)
+ widthAttr.getValue().getZExtValue() <= subgroupSize)
return rewriter.notifyMatchFailure(
- shuffleOp, "shuffle width and target subgroup size mismatch");
+ shuffleOp, "shuffle width is larger than target subgroup size");
Location loc = shuffleOp.getLoc();
Value trueVal = spirv::ConstantOp::getOne(rewriter.getI1Type(),
>From 62e777e26cb72a255f9ef289cc689dcf935748e5 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <hsiangkai.wang at arm.com>
Date: Wed, 21 May 2025 14:07:55 +0100
Subject: [PATCH 3/3] fix typo
---
mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index 2e45d782ce0c7..c8dc1f41c7146 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -433,7 +433,7 @@ LogicalResult GPUShuffleConversion::matchAndRewrite(
// The width argument specifies the number of lanes that participate in the
// shuffle. The width value should not exceed the subgroup limit.
if (!matchPattern(shuffleOp.getWidth(), m_Constant(&widthAttr)) ||
- widthAttr.getValue().getZExtValue() <= subgroupSize)
+ widthAttr.getValue().getZExtValue() > subgroupSize)
return rewriter.notifyMatchFailure(
shuffleOp, "shuffle width is larger than target subgroup size");
More information about the Mlir-commits
mailing list