[Mlir-commits] [mlir] [mlir][gpu] Make offset and width in gpu.rotate as attributes (PR #150901)
Hsiangkai Wang
llvmlistbot at llvm.org
Mon Jul 28 01:41:50 PDT 2025
https://github.com/Hsiangkai created https://github.com/llvm/llvm-project/pull/150901
`offset` and `width` must be constants and there are constraints on their values. Update the operation definition to use attributes instead of operands.
>From c272d8fff3b0018d6cca5712c4375cac3860efd6 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <hsiangkai.wang at arm.com>
Date: Fri, 25 Jul 2025 21:01:54 +0100
Subject: [PATCH] [mlir][gpu] Make offset and width in gpu.rotate as attributes
`offset` and `width` must be constants and there are constraints on
their values. Update the operation definition to use attributes instead
of operands.
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 13 ++----
mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 18 ++++-----
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 35 +++-------------
mlir/test/Conversion/GPUToSPIRV/rotate.mlir | 38 +++---------------
mlir/test/Dialect/GPU/invalid.mlir | 40 +++----------------
mlir/test/Dialect/GPU/ops.mlir | 5 +--
6 files changed, 30 insertions(+), 119 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 1dbaf5db7b618..170616f03be2c 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1368,12 +1368,12 @@ def GPU_ShuffleOp : GPU_Op<
def GPU_RotateOp : GPU_Op<
"rotate", [Pure, AllTypesMatch<["value", "rotateResult"]>]>,
- Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32:$offset, I32:$width)>,
+ Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32Attr:$offset, I32Attr:$width)>,
Results<(outs AnyIntegerOrFloatOr1DVector:$rotateResult, I1:$valid)> {
let summary = "Rotate values within a subgroup.";
let description = [{
The "rotate" op moves values across lanes in a subgroup (a.k.a., local
- invocations) within the same subgroup. The `width` argument specifies the
+ invocations) within the same subgroup. The `width` attribute specifies the
number of lanes that participate in the rotation, and must be uniform across
all participating lanes. Further, the first `width` lanes of the subgroup
must be active.
@@ -1394,9 +1394,7 @@ def GPU_RotateOp : GPU_Op<
example:
```mlir
- %offset = arith.constant 1 : i32
- %width = arith.constant 16 : i32
- %1, %2 = gpu.rotate %0, %offset, %width : f32
+ %1, %2 = gpu.rotate %0, 1, 16 : f32
```
For lane `k`, returns the value from lane `(k + cst1) % width`.
@@ -1406,11 +1404,6 @@ def GPU_RotateOp : GPU_Op<
$value `,` $offset `,` $width attr-dict `:` type($value)
}];
- let builders = [
- // Helper function that creates a rotate with constant offset/width.
- OpBuilder<(ins "Value":$value, "int32_t":$offset, "int32_t":$width)>
- ];
-
let hasVerifier = 1;
}
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index 75e65632b0cb7..a156fdd3d383e 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -507,25 +507,25 @@ LogicalResult GPURotateConversion::matchAndRewrite(
getTypeConverter<SPIRVTypeConverter>()->getTargetEnv();
unsigned subgroupSize =
targetEnv.getAttr().getResourceLimits().getSubgroupSize();
- IntegerAttr widthAttr;
- if (!matchPattern(rotateOp.getWidth(), m_Constant(&widthAttr)) ||
- widthAttr.getValue().getZExtValue() > subgroupSize)
+ unsigned width = rotateOp.getWidth();
+ if (width > subgroupSize)
return rewriter.notifyMatchFailure(
- rotateOp,
- "rotate width is not a constant or larger than target subgroup size");
+ rotateOp, "rotate width is larger than target subgroup size");
Location loc = rotateOp.getLoc();
auto scope = rewriter.getAttr<spirv::ScopeAttr>(spirv::Scope::Subgroup);
+ Value offsetVal = arith::ConstantOp::create(rewriter, loc, adaptor.getOffsetAttr());
+ Value widthVal = arith::ConstantOp::create(rewriter, loc, adaptor.getWidthAttr());
Value rotateResult = spirv::GroupNonUniformRotateKHROp::create(
- rewriter, loc, scope, adaptor.getValue(), adaptor.getOffset(),
- adaptor.getWidth());
+ rewriter, loc, scope, adaptor.getValue(), offsetVal, widthVal);
Value validVal;
- if (widthAttr.getValue().getZExtValue() == subgroupSize) {
+ if (width == subgroupSize) {
validVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), loc, rewriter);
} else {
+ IntegerAttr widthAttr = adaptor.getWidthAttr();
Value laneId = gpu::LaneIdOp::create(rewriter, loc, widthAttr);
validVal = arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::ult,
- laneId, adaptor.getWidth());
+ laneId, widthVal);
}
rewriter.replaceOp(rotateOp, {rotateResult, validVal});
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index d186a480c0ce5..abd2ba5680471 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -1395,40 +1395,15 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
// RotateOp
//===----------------------------------------------------------------------===//
-void RotateOp::build(OpBuilder &builder, OperationState &result, Value value,
- int32_t offset, int32_t width) {
- build(builder, result, value,
- arith::ConstantOp::create(builder, result.location,
- builder.getI32IntegerAttr(offset)),
- arith::ConstantOp::create(builder, result.location,
- builder.getI32IntegerAttr(width)));
-}
-
LogicalResult RotateOp::verify() {
- auto offsetConstOp = getOffset().getDefiningOp<arith::ConstantOp>();
- if (!offsetConstOp)
- return emitOpError() << "offset is not a constant value";
-
- auto offsetIntAttr =
- llvm::dyn_cast<mlir::IntegerAttr>(offsetConstOp.getValue());
-
- auto widthConstOp = getWidth().getDefiningOp<arith::ConstantOp>();
- if (!widthConstOp)
- return emitOpError() << "width is not a constant value";
-
- auto widthIntAttr =
- llvm::dyn_cast<mlir::IntegerAttr>(widthConstOp.getValue());
-
- llvm::APInt offsetValue = offsetIntAttr.getValue();
- llvm::APInt widthValue = widthIntAttr.getValue();
+ uint32_t offset = getOffset();
+ uint32_t width = getWidth();
- if (!widthValue.isPowerOf2())
+ if (!llvm::isPowerOf2_32(width))
return emitOpError() << "width must be a power of two";
- if (offsetValue.sge(widthValue) || offsetValue.slt(0)) {
- int64_t widthValueInt = widthValue.getSExtValue();
- return emitOpError() << "offset must be in the range [0, " << widthValueInt
- << ")";
+ if (offset >= width) {
+ return emitOpError() << "offset must be in the range [0, " << width << ")";
}
return success();
diff --git a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
index b96dd37219b46..c71d22017698a 100644
--- a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir
@@ -10,16 +10,14 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate()
gpu.func @rotate() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
%val = arith.constant 42.0 : f32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32
- // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %{{.+}} = spirv.Constant true
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 16 : f32
gpu.return
}
}
@@ -38,18 +36,16 @@ gpu.module @kernels {
// CHECK-LABEL: spirv.func @rotate_width_less_than_subgroup_size()
gpu.func @rotate_width_less_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 8 : i32
%val = arith.constant 42.0 : f32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32
// CHECK: %[[WIDTH:.+]] = spirv.Constant 8 : i32
- // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
// CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR <Subgroup> %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32
// CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__
// CHECK: %[[INVOCATION_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]]
// CHECK: %{{.+}} = spirv.ULessThan %[[INVOCATION_ID]], %[[WIDTH]]
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 8 : f32
gpu.return
}
}
@@ -67,34 +63,10 @@ module attributes {
gpu.module @kernels {
gpu.func @rotate_with_bigger_than_subgroup_size() kernel
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %width = arith.constant 32 : i32
%val = arith.constant 42.0 : f32
// expected-error @+1 {{failed to legalize operation 'gpu.rotate'}}
- %result, %valid = gpu.rotate %val, %offset, %width : f32
- gpu.return
- }
-}
-
-}
-
-// -----
-
-module attributes {
- gpu.container_module,
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformRotateKHR], []>,
- #spirv.resource_limits<subgroup_size = 16>>
-} {
-
-gpu.module @kernels {
- gpu.func @rotate_non_const_width(%width: i32) kernel
- attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
- %offset = arith.constant 4 : i32
- %val = arith.constant 42.0 : f32
-
- // expected-error @+1 {{'gpu.rotate' op width is not a constant value}}
- %result, %valid = gpu.rotate %val, %offset, %width : f32
+ %result, %valid = gpu.rotate %val, 4, 32 : f32
gpu.return
}
}
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 162ff0662e91e..0d2dc6fa34b02 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -479,20 +479,16 @@ func.func @shuffle_unsupported_type_vec(%arg0 : vector<[4]xf32>, %arg1 : i32, %a
// -----
func.func @rotate_mismatching_type(%arg0 : f32) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
// expected-error at +1 {{op failed to verify that all of {value, rotateResult} have same type}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (i32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 16 : i32 } : (f32) -> (i32, i1)
return
}
// -----
func.func @rotate_unsupported_type(%arg0 : index) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 16 : i32
// expected-error at +1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'index'}}
- %rotate, %valid = gpu.rotate %arg0, %offset, %width : index
+ %rotate, %valid = gpu.rotate %arg0, 4, 16 : index
return
}
@@ -502,55 +498,31 @@ func.func @rotate_unsupported_type_vec(%arg0 : vector<[4]xf32>) {
%offset = arith.constant 4 : i32
%width = arith.constant 16 : i32
// expected-error at +1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'vector<[4]xf32>'}}
- %rotate, %valid = gpu.rotate %arg0, %offset, %width : vector<[4]xf32>
+ %rotate, %valid = gpu.rotate %arg0, 4, 16 : vector<[4]xf32>
return
}
// -----
func.func @rotate_unsupported_width(%arg0 : f32) {
- %offset = arith.constant 4 : i32
- %width = arith.constant 15 : i32
// expected-error at +1 {{op width must be a power of two}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 15 : i32 } : (f32) -> (f32, i1)
return
}
// -----
func.func @rotate_unsupported_offset(%arg0 : f32) {
- %offset = arith.constant 16 : i32
- %width = arith.constant 16 : i32
// expected-error at +1 {{op offset must be in the range [0, 16)}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = 16 : i32, width = 16 : i32 }: (f32) -> (f32, i1)
return
}
// -----
func.func @rotate_unsupported_offset_minus(%arg0 : f32) {
- %offset = arith.constant -1 : i32
- %width = arith.constant 16 : i32
// expected-error at +1 {{op offset must be in the range [0, 16)}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
- return
-}
-
-// -----
-
-func.func @rotate_offset_non_constant(%arg0 : f32, %offset : i32) {
- %width = arith.constant 16 : i32
- // expected-error at +1 {{op offset is not a constant value}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
- return
-}
-
-// -----
-
-func.func @rotate_width_non_constant(%arg0 : f32, %width : i32) {
- %offset = arith.constant 0 : i32
- // expected-error at +1 {{op width is not a constant value}}
- %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1)
+ %rotate, %valid = "gpu.rotate"(%arg0) { offset = -1 : i32, width = 16 : i32 } : (f32) -> (f32, i1)
return
}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 2aef80f73feb3..ee1fdfa4d02f0 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -140,9 +140,8 @@ module attributes {gpu.container_module} {
// CHECK: gpu.shuffle idx %{{.*}}, %{{.*}}, %{{.*}} : f32
%shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32
- // CHECK: gpu.rotate %{{.*}}, %{{.*}}, %{{.*}} : f32
- %rotate_width = arith.constant 16 : i32
- %rotate, %pred4 = gpu.rotate %arg0, %offset, %rotate_width : f32
+ // CHECK: gpu.rotate %{{.*}}, 3, 16 : f32
+ %rotate, %pred4 = gpu.rotate %arg0, 3, 16 : f32
"gpu.barrier"() : () -> ()
More information about the Mlir-commits
mailing list