[Mlir-commits] [mlir] [mlir][ROCDL] Add subgroup_reduce lowering support for gfx10+ devices (PR #135983)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Apr 16 09:40:32 PDT 2025
https://github.com/Muzammiluddin-Syed-ECE updated https://github.com/llvm/llvm-project/pull/135983
>From 44df8d58a5ec9884dc4003d8e1a79a84397c2df8 Mon Sep 17 00:00:00 2001
From: Muzammiluddin Syed <muzasyed at amd.com>
Date: Wed, 16 Apr 2025 01:07:47 -0500
Subject: [PATCH] Adding permlanex16 and rowshare to rocdl mlir dialect
Signed-off-by: Muzammiluddin Syed <muzasyed at amd.com>
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 4 +++-
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 16 ++++++++++++++++
.../Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp | 6 ++++++
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 14 ++++++++++++++
mlir/test/Conversion/AMDGPUToROCDL/dpp.mlir | 8 ++++++++
5 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 108d7237ff703..17c1162170073 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -524,7 +524,8 @@ def AMDGPU_DPPPerm : I32EnumAttr<"DPPPerm",
I32EnumAttrCase<"row_mirror", 8>,
I32EnumAttrCase<"row_half_mirror", 9>,
I32EnumAttrCase<"row_bcast_15", 10>,
- I32EnumAttrCase<"row_bcast_31", 11>
+ I32EnumAttrCase<"row_bcast_31", 11>,
+ I32EnumAttrCase<"row_share", 12>
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::mlir::amdgpu";
@@ -557,6 +558,7 @@ def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result",
- Reverse within a half-row (`row_half_mirror`)
- Broadcast the 15th lane of each row to the next row (`row_bcast`)
- Broadcast lane 31 to rows 2 and 3 (`row_bcast`)
+ - Broadcast a lane [0-15] within row 0 to all lanes of row 0 (`row_share`)
}];
let results = (outs AnyType:$result);
let assemblyFormat = [{
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 900155c274b4d..186a4f53f93cb 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -668,6 +668,22 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],
}];
}
+// PermLaneX16 intrinsic operation
+def ROCDL_PermlaneX16Op : ROCDL_IntrOp<"permlanex16", [], [0],
+ [AllTypesMatch<["res", "old", "src0"]>, AllTypesMatch<["src1", "src2"]>], 1, 0, 0,
+ [4, 5], ["fi", "boundControl"]>,
+ Arguments<(ins LLVM_Type:$old, LLVM_Type:$src0, LLVM_Type:$src1, LLVM_Type:$src2,
+ I1Attr:$fi, I1Attr:$boundControl)> {
+ let results = (outs LLVM_Type:$res);
+ let assemblyFormat = [{
+ attr-dict $old `,` $src0 `,` $src1 `,` $src2 `,` $fi `,` $boundControl `:` type($src0) `,` type($src1)
+ }];
+ let description = [{
+ Performs a `permlanex16` operation with the given operands, applying the
+ permutation specified by $fi to the provided inputs.
+ }];
+}
+
def ROCDL_V2I16Type : FixedVectorOfLengthAndType<[2], [I16]>,
BuildableType<"::mlir::VectorType::get("
"{2},$_builder.getI16Type())">;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 5f697bdeef566..4d343c8f3200c 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -1293,6 +1293,7 @@ struct AMDGPUDPPLowering : public ConvertOpToLLVMPattern<DPPOp> {
ROW_HALF_MIRROR = 0x141,
BCAST15 = 0x142,
BCAST31 = 0x143,
+ ROW_SHARE0 = 0x150
};
auto kind = DppOp.getKind();
@@ -1350,6 +1351,11 @@ struct AMDGPUDPPLowering : public ConvertOpToLLVMPattern<DPPOp> {
case DPPPerm::row_bcast_31:
DppCtrl = DppCtrl::BCAST31;
break;
+ case DPPPerm::row_share:
+ if (auto intAttr = cast<IntegerAttr>(*permArgument)) {
+ DppCtrl = intAttr.getInt() + DppCtrl::ROW_SHARE0;
+ }
+ break;
}
// Check for row_mask, bank_mask, bound_ctrl if they exist and create
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 549a4376a4a04..af4438f028542 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -468,6 +468,20 @@ LogicalResult DPPOp::verify() {
}
break;
}
+
+ case DPPPerm::row_share: {
+ if (!permArgument) {
+ return emitOpError("Attribute '" + Twine(stringifyDPPPerm(kind)) +
+ "' value not specified");
+ }
+ if (auto intAttr = dyn_cast<IntegerAttr>(permArgument)) {
+ uint32_t attrValue = intAttr.getInt();
+ if (attrValue < 0 || attrValue > 15) {
+ return emitOpError(
+ "Attribute value for 'row_share' must be between 0 and 15");
+ }
+ }
+ } break;
}
return success();
}
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/dpp.mlir b/mlir/test/Conversion/AMDGPUToROCDL/dpp.mlir
index 14691e73e62d7..64b3328b70ab4 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/dpp.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/dpp.mlir
@@ -137,3 +137,11 @@ func.func @row_bcast_update_dpp_f16(%arg0: f16, %arg1: f16) -> f16 {
%0 = amdgpu.dpp %arg0 %arg1 row_bcast_15 { bound_ctrl = true } : f16
return %0 : f16
}
+
+func.func @dpp_row_share(%arg0: i32, %arg1: i32) -> i32 {
+ // CHECK-LABEL: func @dpp_row_share
+ // CHECK: rocdl.update.dpp %arg0, %arg1 with 351, 15, 15, false : i32
+ // CHECK: return %0 : i32
+ %0 = amdgpu.dpp %arg0 %arg1 row_share ( 0xf : i32 ) : i32
+ return %0 : i32
+}
More information about the Mlir-commits
mailing list