[Mlir-commits] [mlir] [MLIR][ROCDL] Add conversion for gpu.subgroup_id to ROCDL (PR #136405)

Wed Apr 23 12:05:46 PDT 2025

================
@@ -190,6 +221,62 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
   }
 };
 
+struct GPUSubgroupIdOpToROCDL final
+    : ConvertOpToLLVMPattern<gpu::SubgroupIdOp> {
+
+  GPUSubgroupIdOpToROCDL(const LLVMTypeConverter &converter,
+                         const mlir::amdgpu::Chipset &chipset,
+                         std::optional<int64_t> subgroupSize = std::nullopt)
+      : ConvertOpToLLVMPattern(converter), chipset(chipset),
+        subgroupSize(subgroupSize) {}
+
+  const mlir::amdgpu::Chipset chipset;
+  const std::optional<int64_t> subgroupSize;
+
+  int64_t getSubgroupSize() const {
+    if (subgroupSize)
+      return *subgroupSize;
+    return querySubgroupSize(chipset);
+  }
+
+  LogicalResult
+  matchAndRewrite(gpu::SubgroupIdOp op, gpu::SubgroupIdOp::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto int32Type = IntegerType::get(rewriter.getContext(), 32);
+    auto loc = op.getLoc();
+    LLVM::IntegerOverflowFlags flags =
+        LLVM::IntegerOverflowFlags::nsw | LLVM::IntegerOverflowFlags::nuw;
+    // linearized thread ids are divided into consecutive subgroups.
+    // Where thread id is calculated as:
+    // thread_id = w_id.x + w_dim.x * (w_id.y + (w_dim.y * w_id.z))
+    // And the subgroup id of the thread is calculated as:
+    // subgroup_id = thread_id / subgroup_size
----------------
kuhar wrote:

nit: Let's follow https://llvm.org/docs/CodingStandards.html#commenting

https://github.com/llvm/llvm-project/pull/136405