[Mlir-commits] [mlir] [mlir][gpu] Add `gpu.subgroup_uniform` op (PR #157743)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Sep 10 08:04:43 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Ivan Butygin (Hardcode84)
<details>
<summary>Changes</summary>
Introducing a dedicated op instead of broadcast `any_lane` per discussion https://github.com/llvm/llvm-project/pull/152808
---
Full diff: https://github.com/llvm/llvm-project/pull/157743.diff
5 Files Affected:
- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+33)
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+18-1)
- (modified) mlir/lib/Dialect/GPU/IR/GPUDialect.cpp (+9)
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+12)
- (modified) mlir/test/Dialect/GPU/ops.mlir (+8)
``````````diff
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 987fc13e0508d..3e2fa6b43d5fd 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -3255,4 +3255,37 @@ def GPU_SubgroupBroadcastOp : GPU_Op<"subgroup_broadcast",
let hasVerifier = 1;
}
+def GPU_SubgroupUniformOp : GPU_Op<"subgroup_uniform",
+ [Pure, AllTypesMatch<["result", "src"]>,
+ DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>] #
+ ElementwiseMappable.traits>,
+ Arguments<(ins AnyType:$src)> {
+ let summary = "Assumes value is unform across the lanes in subgroup";
+ let description = [{
+ The "subgroup_uniform" op assumes that the value is uniform across all lanes
+ in a subgroup. This means that all active lanes in the subgroup are expected
+ to have the same value.
+
+ This op can be used to inform the compiler that a value is uniform across
+ the subgroup, enabling optimizations. The result is poison if the value
+ is not actually uniform.
+
+ This op is functionally no-op as no valid program should change its
+ semantics if this op is removed. Backends can choose to ignore it or do
+ some optimizations (e.g. put value into scalar registers).
+
+ This op can be freely speculated across structured control flow as parent
+ active mask is always superset of current mask and if can hoist input
+ calculation you can hoist the operation itself as well.
+
+ Example:
+
+ ```mlir
+ %1 = gpu.subgroup_uniform %0 : f32
+ ```
+ }];
+ let results = (outs AnyType:$result);
+ let assemblyFormat = "$src attr-dict `:` type($result)";
+}
+
#endif // GPU_OPS
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 807d1f52ee69b..5377ce709497e 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -201,6 +201,22 @@ struct GPUSubgroupBroadcastOpToROCDL
}
};
+struct GPUSubgroupUniformOpToROCDL
+ : public ConvertOpToLLVMPattern<gpu::SubgroupUniformOp> {
+ using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+
+ LogicalResult
+ matchAndRewrite(gpu::SubgroupUniformOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ Value src = adaptor.getSrc();
+ if (!isSupportedReadLaneType(src.getType()))
+ return rewriter.notifyMatchFailure(op, "unsupported readlane type");
+
+ rewriter.replaceOpWithNewOp<ROCDL::ReadfirstlaneOp>(op, src.getType(), src);
+ return success();
+ }
+};
+
struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
using ConvertOpToLLVMPattern<gpu::ShuffleOp>::ConvertOpToLLVMPattern;
@@ -494,7 +510,8 @@ void mlir::populateGpuToROCDLConversionPatterns(
patterns.add<GPUDynamicSharedMemoryOpLowering>(converter);
patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL,
- GPUSubgroupBroadcastOpToROCDL>(converter);
+ GPUSubgroupBroadcastOpToROCDL, GPUSubgroupUniformOpToROCDL>(
+ converter);
patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
populateMathToROCDLConversionPatterns(converter, patterns);
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 43b02f16aa829..6022fa517421a 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -2550,6 +2550,15 @@ LogicalResult gpu::SubgroupBroadcastOp::verify() {
}
}
+//===----------------------------------------------------------------------===//
+// GPU_SubgroupUniformOp
+//===----------------------------------------------------------------------===//
+
+void gpu::SubgroupUniformOp::inferResultRanges(
+ ArrayRef<ConstantIntRanges> argRanges, SetIntRangeFn setResultRange) {
+ setResultRange(getResult(), argRanges.front());
+}
+
//===----------------------------------------------------------------------===//
// GPU KernelMetadataAttr
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index c6261b37ef8f2..69fb45d9097b8 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -816,3 +816,15 @@ func.func @broadcast(%arg0 : index, %arg1 : i32) -> (index, index) {
func.return %0, %1 : index, index
}
}
+
+// -----
+
+gpu.module @test_module {
+// CHECK-LABEL: func @unifprm
+// CHECK-SAME: (%[[ARG:.*]]: i64)
+func.func @unifprm(%arg0 : index) -> index {
+// CHECK: %{{.*}} = rocdl.readfirstlane %[[ARG]] : i64
+ %0 = gpu.subgroup_uniform %arg0 : index
+ func.return %0 : index
+}
+}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index e3e2474d917c8..d45d1cf52d91d 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -552,3 +552,11 @@ func.func @subgroup_broadcast(%arg0 : f32, %arg1 : i32) -> (f32, f32) {
%1 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : f32
func.return %0, %1 : f32, f32
}
+
+// CHECK-LABEL: func @subgroup_uniform
+// CHECK-SAME: (%[[ARG:.*]]: f32)
+func.func @subgroup_uniform(%arg0 : f32) -> f32 {
+ // CHECK: gpu.subgroup_uniform %[[ARG]] : f32
+ %0 = gpu.subgroup_uniform %arg0 : f32
+ func.return %0 : f32
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/157743
More information about the Mlir-commits
mailing list