[Mlir-commits] [mlir] d835071 - [mlir] GPUToROCDL: lower `gpu.subgroup_id` to the intrinsic where possible (#179422)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Feb 3 13:53:12 PST 2026
Author: Ivan Butygin
Date: 2026-02-04T00:53:07+03:00
New Revision: d8350712b300bcd4d05240b8edec40cc96e6d588
URL: https://github.com/llvm/llvm-project/commit/d8350712b300bcd4d05240b8edec40cc96e6d588
DIFF: https://github.com/llvm/llvm-project/commit/d8350712b300bcd4d05240b8edec40cc96e6d588.diff
LOG: [mlir] GPUToROCDL: lower `gpu.subgroup_id` to the intrinsic where possible (#179422)
Lower `gpu.subgroup_id` to `wave.id` intrinsic on gfx12+, lower to
`linearized_thread_id / subgroup_size` on older.
Added:
mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-subgroup-id.mlir
Modified:
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 69a83468cfa84..096554d53e031 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -170,6 +170,67 @@ struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
const amdgpu::Chipset chipset;
};
+struct GPUSubgroupIdOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupIdOp> {
+ using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+
+ GPUSubgroupIdOpToROCDL(const LLVMTypeConverter &converter,
+ amdgpu::Chipset chipset)
+ : ConvertOpToLLVMPattern<gpu::SubgroupIdOp>(converter), chipset(chipset) {
+ }
+
+ LogicalResult
+ matchAndRewrite(gpu::SubgroupIdOp op, gpu::SubgroupIdOp::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+ auto int32Type = rewriter.getI32Type();
+
+ Value subgroupId;
+ if (chipset.majorVersion >= 12) {
+ // For gfx12+, use the hardware wave.id register directly.
+ LLVM::ConstantRangeAttr bounds;
+ if (auto upperBoundAttr = op.getUpperBoundAttr())
+ bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
+ /*bitWidth=*/32, /*lower=*/0,
+ /*upper=*/upperBoundAttr.getInt());
+ subgroupId = ROCDL::WaveId::create(rewriter, loc, int32Type, bounds);
+ } else {
+ // For older architectures, compute:
+ // subgroup_id = linearized_thread_id / subgroup_size
+ // where linearized_thread_id = tid.x + dim.x * (tid.y + dim.y * tid.z)
+ Value tidX = ROCDL::ThreadIdXOp::create(rewriter, loc, int32Type);
+ Value tidY = ROCDL::ThreadIdYOp::create(rewriter, loc, int32Type);
+ Value tidZ = ROCDL::ThreadIdZOp::create(rewriter, loc, int32Type);
+ Value dimX = ROCDL::BlockDimXOp::create(rewriter, loc, int32Type);
+ Value dimY = ROCDL::BlockDimYOp::create(rewriter, loc, int32Type);
+
+ // linearized = tid.x + dim.x * (tid.y + dim.y * tid.z)
+ // Thread IDs and dimensions are non-negative and small, so use nuw+nsw.
+ auto flags =
+ LLVM::IntegerOverflowFlags::nsw | LLVM::IntegerOverflowFlags::nuw;
+ Value dimYxTidZ =
+ LLVM::MulOp::create(rewriter, loc, int32Type, dimY, tidZ, flags);
+ Value tidYPlusDimYxTidZ =
+ LLVM::AddOp::create(rewriter, loc, int32Type, tidY, dimYxTidZ, flags);
+ Value dimXxInner = LLVM::MulOp::create(rewriter, loc, int32Type, dimX,
+ tidYPlusDimYxTidZ, flags);
+ Value linearized = LLVM::AddOp::create(rewriter, loc, int32Type, tidX,
+ dimXxInner, flags);
+
+ Value subgroupSize =
+ ROCDL::WavefrontSizeOp::create(rewriter, loc, int32Type);
+ subgroupId = LLVM::UDivOp::create(rewriter, loc, int32Type, linearized,
+ subgroupSize);
+ }
+
+ subgroupId =
+ truncOrExtToLLVMType(rewriter, loc, subgroupId, *getTypeConverter());
+ rewriter.replaceOp(op, subgroupId);
+ return success();
+ }
+
+ const amdgpu::Chipset chipset;
+};
+
static bool isSupportedReadLaneType(Type type) {
// https://llvm.org/docs/AMDGPUUsage.html#llvm-ir-intrinsics
if (isa<Float16Type, BFloat16Type, Float32Type, Float64Type,
@@ -586,8 +647,8 @@ void mlir::populateGpuToROCDLConversionPatterns(
patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL,
GPUSubgroupBroadcastOpToROCDL>(converter);
- patterns.add<GPUSubgroupSizeOpToROCDL, GPUBarrierOpLowering>(converter,
- chipset);
+ patterns.add<GPUSubgroupIdOpToROCDL, GPUSubgroupSizeOpToROCDL,
+ GPUBarrierOpLowering>(converter, chipset);
populateMathToROCDLConversionPatterns(converter, patterns, chipset);
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-subgroup-id.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-subgroup-id.mlir
new file mode 100644
index 0000000000000..030eb0e5eb181
--- /dev/null
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-subgroup-id.mlir
@@ -0,0 +1,40 @@
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx942' | FileCheck %s --check-prefixes=CHECK,GFX9
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx1201' | FileCheck %s --check-prefixes=CHECK,GFX12
+
+gpu.module @test_module {
+// CHECK-LABEL: func @subgroup_id()
+func.func @subgroup_id() -> index {
+ // GFX12: rocdl.wave.id : i32
+ // GFX12: llvm.sext %{{.*}} : i32 to i64
+
+ // GFX9-DAG: rocdl.workitem.id.x : i32
+ // GFX9-DAG: rocdl.workitem.id.y : i32
+ // GFX9-DAG: rocdl.workitem.id.z : i32
+ // GFX9-DAG: rocdl.workgroup.dim.x : i32
+ // GFX9-DAG: rocdl.workgroup.dim.y : i32
+ // GFX9-DAG: llvm.mul %{{.*}}, %{{.*}} overflow<nsw, nuw>
+ // GFX9-DAG: llvm.add %{{.*}}, %{{.*}} overflow<nsw, nuw>
+ // GFX9: rocdl.wavefrontsize : i32
+ // GFX9: llvm.udiv
+ // GFX9: llvm.sext %{{.*}} : i32 to i64
+ %subgroupId = gpu.subgroup_id : index
+ func.return %subgroupId : index
+}
+
+// CHECK-LABEL: func @subgroup_id_with_upper_bound()
+func.func @subgroup_id_with_upper_bound() -> index {
+ // GFX12: rocdl.wave.id range <i32, 0, 4> : i32
+ // GFX12: llvm.sext %{{.*}} : i32 to i64
+
+ // GFX9-DAG: rocdl.workitem.id.x : i32
+ // GFX9-DAG: rocdl.workitem.id.y : i32
+ // GFX9-DAG: rocdl.workitem.id.z : i32
+ // GFX9-DAG: rocdl.workgroup.dim.x : i32
+ // GFX9-DAG: rocdl.workgroup.dim.y : i32
+ // GFX9: rocdl.wavefrontsize : i32
+ // GFX9: llvm.udiv
+ // GFX9: llvm.sext %{{.*}} : i32 to i64
+ %subgroupId = gpu.subgroup_id upper_bound 4 : index
+ func.return %subgroupId : index
+}
+}
More information about the Mlir-commits
mailing list