[Mlir-commits] [mlir] [ROCDL] Added rocdl.fmed3 -> Intrinsic::amdgcn_fmed3 (PR #159332)
Keshav Vinayak Jha
llvmlistbot at llvm.org
Wed Sep 17 04:46:59 PDT 2025
https://github.com/keshavvinayak01 created https://github.com/llvm/llvm-project/pull/159332
## Description
Added support for AMDGPU signed (med3) intrinsics. Implemented `smed3` and `umed3` ISA instructions from ROCDL with complete end-to-end support including LLVM intrinsics, Clang builtins, AMDGPU backend instruction selection, and MLIR ROCDL dialect operations.
## Testing
- 4 Lit test files in `llvm/test/CodeGen/AMDGPU/`
- ROCDL -> LLVMIR lit tests for new `rocdl.med3.<dtype>` ops in `/test/Target/LLVMIR/rocdl.mlir`
Addresses [#157052](https://github.com/llvm/llvm-project/issues/157052)
>From 285f5027e39bac2570c914fb5fb96d2658f559f0 Mon Sep 17 00:00:00 2001
From: keshavvinayak01 <keshavvinayakjha at gmail.com>
Date: Wed, 17 Sep 2025 11:40:49 +0000
Subject: [PATCH] Added fmed3 rocdl op
Signed-off-by: keshavvinayak01 <keshavvinayakjha at gmail.com>
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 20 ++++++++++++++++++++
mlir/test/Target/LLVMIR/rocdl.mlir | 14 ++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 9fa3ec1fc4b21..1d31ec069b5c0 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -1291,6 +1291,26 @@ def ROCDL_CvtScaleF32PkFp4F32Op :
}];
}
+//===----------------------------------------------------------------------===//
+// MED3 operations
+//===----------------------------------------------------------------------===//
+
+def ROCDL_Med3Op : ROCDL_ConcreteNonMemIntrOp<"med3", [Pure, AllTypesMatch<["res", "src0", "src1", "src2"]>], 1>,
+ Arguments<(ins LLVM_ScalarOrVectorOf<LLVM_AnyFloat>:$src0,
+ LLVM_ScalarOrVectorOf<LLVM_AnyFloat>:$src1,
+ LLVM_ScalarOrVectorOf<LLVM_AnyFloat>:$src2)> {
+ let results = (outs LLVM_ScalarOrVectorOf<LLVM_AnyFloat>:$res);
+ let summary = "Median of three float/half values";
+ let assemblyFormat = [{
+ $src0 `,` $src1 `,` $src2 attr-dict `:` `(` type($src0) `,` type($src1) `,` type($src2) `)` `->` type($res)
+ }];
+ string llvmBuilder = [{
+ $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_fmed3,
+ {$src0, $src1, $src2},
+ {moduleTranslation.convertType(op.getRes().getType())});
+ }];
+}
+
//===----------------------------------------------------------------------===//
// ROCDL target attribute.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index a464358250c38..579669f646ceb 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -1298,6 +1298,20 @@ llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
llvm.return %ret : i32
}
+llvm.func @test_med3_f16(%arg0: f16, %arg1: f16, %arg2: f16) -> f16 {
+ // CHECK-LABEL: define half @test_med3_f16(half %0, half %1, half %2)
+ %0 = rocdl.med3 %arg0, %arg1, %arg2 : (f16, f16, f16) -> f16
+ llvm.return %0 : f16
+ // CHECK: call half @llvm.amdgcn.fmed3.f16(half %0, half %1, half %2)
+}
+
+llvm.func @test_med3_f32(%arg0: f32, %arg1: f32, %arg2: f32) -> f32 {
+ // CHECK-LABEL: define float @test_med3_f32(float %0, float %1, float %2)
+ %0 = rocdl.med3 %arg0, %arg1, %arg2 : (f32, f32, f32) -> f32
+ llvm.return %0 : f32
+ // CHECK: call float @llvm.amdgcn.fmed3.f32(float %0, float %1, float %2)
+}
+
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
More information about the Mlir-commits
mailing list