[Mlir-commits] [mlir] [mlir][spirv][gpu] Add lowering for gpu.subgroup_broadcast (PR #187947)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Mar 22 11:06:24 PDT 2026
https://github.com/hankluo6 created https://github.com/llvm/llvm-project/pull/187947
Fixes #157940
Add lowering for `gpu.subgroup_broadcast` and `gpu.subgroup_broadcast_first` to `spirv.GroupNonUniformBroadcast` and `spirv.GroupNonUniformBroadcastFirst`.
>From 9bb57a047a2b730e7f80b272bcfe54d3e48296b9 Mon Sep 17 00:00:00 2001
From: hankluo6 <hankluo6 at gmail.com>
Date: Fri, 20 Mar 2026 22:31:33 -0700
Subject: [PATCH] Add lowering for gpu.subgroup_broadcast
---
mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 39 +++++++++++++++
.../test/Conversion/GPUToSPIRV/broadcast.mlir | 47 +++++++++++++++++++
2 files changed, 86 insertions(+)
create mode 100644 mlir/test/Conversion/GPUToSPIRV/broadcast.mlir
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index c33a903d03393..d5269511d61a2 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -130,6 +130,18 @@ class GPURotateConversion final : public OpConversionPattern<gpu::RotateOp> {
ConversionPatternRewriter &rewriter) const override;
};
+/// Pattern to convert a gpu.subgroup_broadcast op into a
+/// spirv.GroupNonUniformBroadcast op.
+class GPUSubgroupBroadcastConversion final
+ : public OpConversionPattern<gpu::SubgroupBroadcastOp> {
+public:
+ using Base::Base;
+
+ LogicalResult
+ matchAndRewrite(gpu::SubgroupBroadcastOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override;
+};
+
class GPUPrintfConversion final : public OpConversionPattern<gpu::PrintfOp> {
public:
using Base::Base;
@@ -542,6 +554,32 @@ LogicalResult GPURotateConversion::matchAndRewrite(
return success();
}
+//===----------------------------------------------------------------------===//
+// Subgroup broadcast
+//===----------------------------------------------------------------------===//
+
+LogicalResult GPUSubgroupBroadcastConversion::matchAndRewrite(
+ gpu::SubgroupBroadcastOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const {
+ Location loc = op.getLoc();
+ auto scope = rewriter.getAttr<spirv::ScopeAttr>(spirv::Scope::Subgroup);
+ Value result;
+
+ switch (op.getBroadcastType()) {
+ case gpu::BroadcastType::specific_lane:
+ result = spirv::GroupNonUniformBroadcastOp::create(
+ rewriter, loc, scope, adaptor.getSrc(), adaptor.getLane());
+ break;
+ case gpu::BroadcastType::first_active_lane:
+ result = spirv::GroupNonUniformBroadcastFirstOp::create(
+ rewriter, loc, scope, adaptor.getSrc());
+ break;
+ }
+
+ rewriter.replaceOp(op, result);
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// Group ops
//===----------------------------------------------------------------------===//
@@ -832,6 +870,7 @@ void mlir::populateGPUToSPIRVPatterns(const SPIRVTypeConverter &typeConverter,
patterns.add<
GPUBarrierConversion, GPUFuncOpConversion, GPUModuleConversion,
GPUReturnOpConversion, GPUShuffleConversion, GPURotateConversion,
+ GPUSubgroupBroadcastConversion,
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
diff --git a/mlir/test/Conversion/GPUToSPIRV/broadcast.mlir b/mlir/test/Conversion/GPUToSPIRV/broadcast.mlir
new file mode 100644
index 0000000000000..41aa68b3f76f2
--- /dev/null
+++ b/mlir/test/Conversion/GPUToSPIRV/broadcast.mlir
@@ -0,0 +1,47 @@
+// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s
+
+// -----
+
+module attributes {
+ gpu.container_module,
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader, GroupNonUniformBallot], []>, #spirv.resource_limits<>>
+} {
+
+gpu.module @kernels {
+ // CHECK-LABEL: spirv.func @broadcast_specific_lane()
+ gpu.func @broadcast_specific_lane() kernel
+ attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
+ %lane = arith.constant 0 : i32
+ %val = arith.constant 42.0 : f32
+
+ // CHECK: %[[LANE:.+]] = spirv.Constant 0 : i32
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
+ // CHECK: %{{.+}} = spirv.GroupNonUniformBroadcast <Subgroup> %[[VAL]], %[[LANE]] : f32, i32
+ %result = gpu.subgroup_broadcast %val, specific_lane %lane : f32
+ gpu.return
+ }
+}
+
+}
+
+// -----
+
+module attributes {
+ gpu.container_module,
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader, GroupNonUniformBallot], []>, #spirv.resource_limits<>>
+} {
+
+gpu.module @kernels {
+ // CHECK-LABEL: spirv.func @broadcast_first_active_lane()
+ gpu.func @broadcast_first_active_lane() kernel
+ attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
+ %val = arith.constant 42.0 : f32
+
+ // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32
+ // CHECK: %{{.+}} = spirv.GroupNonUniformBroadcastFirst <Subgroup> %[[VAL]] : f32
+ %result = gpu.subgroup_broadcast %val, first_active_lane : f32
+ gpu.return
+ }
+}
+
+}
More information about the Mlir-commits
mailing list